# Copyright 2002-2008 Josh Clark and Global Moxie, LLC. This code cannot be
# redistributed without permission from globalmoxie.com.  For more
# information, consult your Big Medium license.
#
# $Id: Archive.pm 3171 2008-07-08 13:12:08Z josh $

package BigMed::Archive;
use strict;
use warnings;
use utf8;
use Carp;
our @EXPORT = qw(
  bma_can_extract
  bma_all_extract_types
  bma_extract
  bma_archive_type
  bma_can_compress
  bma_all_compress_types
  bma_compress
);
use base 'Exporter';
use BigMed::DiskUtil qw(bm_untaint_filepath bm_confirm_dir bm_file_path);
use BigMed::Error;
use File::Spec;
use Cwd;

#requires IPC::Run, but win32 sometimes can't compile it.
#require/eval it in _load_extract_map instead of use here.
# use IPC::Run qw(run);

my $ERR = 'BigMed::Error';
my %BIN_APPS;
my @ORDERED_TYPES = qw(zip tgz tbz tar);

### SUPPORTED ARCHIVE TYPES FOR EXTRACTION
my %EXTRACT_CODEMAP = (
    'tgz' => \&_untar_cmd,
    'tar' => \&_untar_cmd,
    'tbz' => \&_untar_cmd,
    'zip' => \&_unzip_cmd,
);
my %EXTRACT_REQUIRES      = (
    'tar' => ['tar'],
    'tgz' => ['tar', 'gzip'],
    'tbz' => ['tar', 'bunzip2'],
    'zip' => ['unzip'],
);

my %COMPRESS_CODEMAP = (
    'tgz' => \&_tgz_cmd,
    'tar' => \&_tar_cmd,
    'tbz' => \&_tbz_cmd,
    'zip' => \&_zip_cmd,
);
my %COMPRESS_REQUIRES      = (
    'tar' => ['tar'],
    'tgz' => ['tar'],
    'tbz' => ['tar'],
    'zip' => ['zip'],
);


sub _load_bin_apps {    #get locations of various archive apps, if available
    return %BIN_APPS if %BIN_APPS;
    foreach my $app (qw[tar unzip gzip bunzip2 zip bzip2]) {
        $BIN_APPS{$app} = _can_run($app);
    }
    return %BIN_APPS;
}

my %CAN_EXTRACT;
my $GOT_EXTRACT;
my %CAN_COMPRESS;
my $GOT_COMPRESS;

sub _load_extract_map {
    return %CAN_EXTRACT if $GOT_EXTRACT;
    $GOT_EXTRACT = 1;
    return ( %CAN_EXTRACT = _load_generic_map('extract') );
}

sub _load_compress_map {
    return %CAN_COMPRESS if $GOT_COMPRESS;
    $GOT_COMPRESS = 1;
    return ( %CAN_COMPRESS = _load_generic_map('compress') );
}

sub _load_generic_map {
    my $type = shift;
    eval { require IPC::Run; }; #win32 sometimes can't do it
    return if $@;

    my %got_app = _load_bin_apps();

    my %test_type = $type eq 'extract' ? %EXTRACT_REQUIRES : %COMPRESS_REQUIRES;
    my %can_do;
    foreach my $type ( keys %test_type ) {
        my $can = 1;
        foreach my $app ( @{ $test_type{$type} } ) {
            $can = 0, last if !$got_app{$app};
        }
        $can_do{$type} = $can if $can;
    }
    return %can_do;
}

sub bma_all_extract_types {
    my %can = _load_extract_map();
    return grep { $can{$_} } @ORDERED_TYPES;
}

sub bma_can_extract {
    my %can = _load_extract_map();
    return $_[0] ? $can{ $_[0] } : scalar keys %can;
}

sub bma_all_compress_types {
    my %can = _load_compress_map();
    return grep { $can{$_} } @ORDERED_TYPES;
}

sub bma_can_compress{
    my %can = _load_compress_map();
    return $_[0] ? $can{ $_[0] } : scalar keys %can;
}

sub bma_extract {
    my ( $archive, $todir, $type ) = @_;
    croak 'archive and target dir paths required' if !$archive || !$todir;

    $archive = bm_untaint_filepath($archive) or return;
    if ( !-e $archive ) {
        return $ERR->set_error(
            head => 'ARCHIVE_Cannot Extract',
            text => ['ARCHIVE_Archive file not found', $archive],
        );
    }
    $type ||= bma_archive_type($archive);
    if ( !$type || !bma_can_extract($type) ) {
        my $supported = join( ', ', bma_all_extract_types() );
        return $ERR->set_error(
            head => 'ARCHIVE_Cannot Extract',
            text => ['ARCHIVE_Unknown archive type', $supported],
        );
    }

    $todir = bm_untaint_filepath($todir) or return;
    bm_confirm_dir( $todir, { build_path => 1, data => 1 } ) or return;

    my ( $out, $err );
    my $orig_dir = getcwd();
    chdir $todir
      or return $ERR->set_error(
        head => 'ARCHIVE_Cannot Extract',
        text => ['ARCHIVE_Cannot access working directory', $todir, $!],
      );

    my @commands = $EXTRACT_CODEMAP{$type}->( $archive, $type );
    IPC::Run::run( @commands, \$out, \$err )
      or
      return $ERR->set_io_error( undef, 'open', $archive, "(extract) $err" );

    if ($orig_dir) {
        $orig_dir = bm_untaint_filepath($orig_dir) or return;
        chdir $orig_dir
          or return $ERR->set_error(
            head => 'ARCHIVE_Cannot Extract',
            text =>
              ['ARCHIVE_Cannot access working directory', $orig_dir, $!],
          );
    }

    return 1;
}

sub bma_archive_type {
    my $fn   = shift or return q{};
    my $type =
        $fn =~ /.+[.]tgz$/ims ? 'tgz'
      : $fn =~ /.+[.]tar$/ims ? 'tar'
      : $fn =~ /.+[.]zip$/ims ? 'zip'
      : $fn =~ /.+[.]tbz$/ims ? 'tbz'
      : q{};
    return $type;
}

sub bma_compress {
    my ( $orig, $outfile, $type ) = @_;
    croak 'original and output paths required' if !$orig || !$outfile;

    #vet original file or directory and build parent directory path
    $orig = bm_untaint_filepath($orig) or return;
    if ( !-e $orig ) {
        return $ERR->set_error(
            head => 'ARCHIVE_Cannot Compress',
            text => ['ARCHIVE_Original file not found', $orig],
        );
    }
    my ( $vol, $oparent, $ofilename ) = File::Spec->splitpath($orig);
    my $parent_path = File::Spec->catpath( $vol, $oparent, q{});
    $ofilename ||= '.'; #compressing a directory
    
    #check target path and figure the type
    $outfile = bm_untaint_filepath($outfile) or return;
    $type ||= bma_archive_type($outfile) || ( bma_all_compress_types() )[0];
    if ( !$type || !bma_can_extract($type) ) {
        my $supported = join( ', ', bma_all_compress_types() );
        return $ERR->set_error(
            head => 'ARCHIVE_Cannot Compress',
            text => ['ARCHIVE_Unknown archive type', $supported],
        );
    }
    $outfile = bm_file_path($outfile, "archive.$type") if -d $outfile;
    $outfile .= ".$type" if $outfile !~ /[.]$type$/msi;

    #make sure the target directory exists
    my ( $tvol, $tparent ) = File::Spec->splitpath($outfile);
    my $target_dir = File::Spec->catpath( $tvol, $tparent, q{});
    bm_confirm_dir( $target_dir, { build_path => 1, data => 1 } ) or return;    

    my ( $out, $err );
    my $orig_dir = getcwd();
    chdir $parent_path
      or return $ERR->set_error(
        head => 'ARCHIVE_Cannot Compress',
        text => ['ARCHIVE_Cannot access working directory', $parent_path, $!],
      );

    my @commands = $COMPRESS_CODEMAP{$type}->( $ofilename, $outfile, $type );
    IPC::Run::run( @commands, \$out, \$err )
      or
      return $ERR->set_io_error( undef, 'write', $outfile, "(compress) $err" );

    if ($orig_dir) {
        $orig_dir = bm_untaint_filepath($orig_dir) or return;
        chdir $orig_dir
          or return $ERR->set_error(
            head => 'ARCHIVE_Cannot Compress',
            text =>
              ['ARCHIVE_Cannot access working directory', $orig_dir, $!],
          );
    }

    return $outfile;
}


sub _can_run {    #borrowed from Jos Bouman's IPC::Cmd...
    my $command = shift;

    # a lot of VMS executables have a symbol defined
    # check those first
    if ( $^O eq 'VMS' ) {
        require VMS::DCLsym;
        my $syms = VMS::DCLsym->new;
        return $command if scalar $syms->getsym( uc $command );
    }

    require Config;
    require File::Spec;
    require ExtUtils::MakeMaker;

    if ( File::Spec->file_name_is_absolute($command) ) {
        return MM->maybe_command($command);
    }
    else {
        for my $dir ( split /\Q$Config::Config{path_sep}\E/ms, $ENV{PATH} ) {
            my $abs = File::Spec->catfile( $dir, $command );
            return $abs if $abs = MM->maybe_command($abs);
        }
    }
    return;
}

sub _unzip_cmd {
    my ( $archive, $type ) = @_;
    my $in;
    return ( [$BIN_APPS{'unzip'}, '-oqq', $archive], \$in );
}

sub _untar_cmd {
    my ( $archive, $type ) = @_;

    my $in;
    $type ||= bma_archive_type($archive);

    my @cmd;
    if ( $type eq 'tar' ) {
        @cmd = ( [$BIN_APPS{'tar'}, '-xf', $archive], \$in );
    }
    elsif ( $type eq 'tgz' ) {
        @cmd = (
            [$BIN_APPS{'gzip'}, '-cdf', $archive],
            '|', [$BIN_APPS{'tar'}, '-xf', '-'],
        );
    }
    else {    #tbz
        @cmd = (
            [$BIN_APPS{'bunzip2'}, '-kfqc', $archive],
            '|', [$BIN_APPS{'tar'}, '-xf', '-'],
        );
    }
    return @cmd;
}

sub _zip_cmd {
    my ( $orig, $outfile, $type ) = @_;
    my $in;
    return ( [$BIN_APPS{'zip'}, '-r', $outfile, $orig], \$in );
}

sub _tgz_cmd {
    my ( $orig, $outfile, $type ) = @_;
    my $in;
    return ( [$BIN_APPS{'tar'}, '-czf', $outfile, $orig], \$in );
}

sub _tar_cmd {
    my ( $orig, $outfile, $type ) = @_;
    my $in;
    return ( [$BIN_APPS{'tar'}, '-cf', $outfile, $orig], \$in );
}

sub _tbz_cmd {
    my ( $orig, $outfile, $type ) = @_;
    my $in;
    return ( [$BIN_APPS{'tar'}, '-cjf', $outfile, $orig], \$in );
}



1;

__END__

=head1 NAME

BigMed::Archive - Extracts compressed archive files

=head1 SYNOPSIS

    use BigMed::Archive;
    
    my $can_extract = bma_can_extract();
    # True if any archive types are supported
    
    my @types = bma_all_extract_types();
    # Returns all supported file extensions: zip, tgz, tbz and/or tar
    
    my $archive_path = '/path/to/foo.zip';
    my $type = bma_archive_type($archive_path);
    # returns the type of archive: zip, tgz, tbz and/or tar
    # (just uses file extension, nothing fancy, no mime check)
    
    bma_extract( $archive_path, $target_dir ) or BigMed::Error->error_stop;
    #extracts the contents of the archive at $archive_path into the
    #directory at $target_dir

=head1 DESCRIPTION

BigMed::Archive uses command-line programs, if available, to compress
or extract  zip, tgz, tbz or tar files. The module exports all of the
methods listed below.

=head1 COMPRESSION METHODS

=head2 C<bma_compress( $file_or_dir_to_compress, $target_dir_or_file, $type )>

Compresses the contents of the file or directory located at the path
specified in the first argument into an archive file located at the target
directory or file name. The third (optional) argument specifies the
compression type to use:

=over 4

=item * zip

=item * tgz

=item * tbz

=item * tar

=back

If the second argument is a directory, an archive file named C<archive.zip>
(or C<.tgz>, C<.tbz> or C<.tar>) will be generated in that directory.

If the second argument is a filename and the $type argument is not
supplied, the method will attempt to extract the filetype from the filename.

If no filetype can be determined (no $type argument or no valid extension
in the target filename), the compression type will be the first available
type supplied by the C<bma_can_compress> method.

On success, returns the file path of the generated archive file. On
failure returns no value and adds a message to Big Medium's error queue.
The third optional argument may be used to specify the type of archive;
otherwise, the type will be taken from the file name extension.

Returns true on success or false on failure (an error message is also
placed in the BigMed::Error queue).

The method will attempt to build a directory path to the specified target
filepath if it does not yet exist.

=head2 C<bma_can_compress()>

Returns true if the command-line programs required to compress any of the
supported archive types are found.

If an optional argument is supplied, the method returns true if the
module can compress archive types with the file extension in the argument:

    if ( bma_can_compress('zip') ) {
        #do zip compression
    }

=head2 C<bma_all_compress_types()>

Returns an array of archive types that the module can figure out how to
compress. Possible values: zip, tgz, tbz, tar

=head1 EXTRACTION METHODS

=head2 C<bma_extract( $archive_path, $target_dir, $type )>

Extracts the contents of the archive located at the path specified in the
first argument into the directory specified in the second argument.
The third optional argument may be used to specify the type of archive;
otherwise, the type will be taken from the file name extension.

Returns true on success or false on failure (an error message is also
placed in the BigMed::Error queue).

=head2 C<bma_can_extract()>

Returns true if the command-line programs required to extract any of the
supported archive types are found.

If an optional argument is supplied, the method returns true if the
module can extract archive types with the file extension in the argument:

    if ( bma_can_extract('zip') ) {
        #do zip extraction
    }

=head2 C<bma_all_extract_types()>

Returns an array of archive types that the module can figure out how to
extract. Possible values: zip, tgz, tbz, tar

=head1 MISC METHODS

=head2 bma_archive_type( $archive_path )

Returns the type of archive based on the file name. There's no mime detection,
it's just based on the file extension.

=head1 AUTHOR & COPYRIGHTS

This module and all Big Medium modules are copyright Josh Clark
and Global Moxie. All rights reserved.

Use of this module and the Big Medium content
management system are governed by Global Moxie's software licenses
and may not be used outside of the terms and conditions outlined
there.

For more information, visit the Global Moxie website at
L<http://globalmoxie.com/>.

Big Medium and Global Moxie are service marks of Global Moxie
and Josh Clark. All rights reserved.

=cut

