package Archive::Tar; use strict; use Carp; use Cwd; use File::Basename; BEGIN { # This bit is straight from the manpages use Exporter (); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $symlinks $compression $has_getpwuid $has_getgrgid); $VERSION = 0.072; @ISA = qw(Exporter); @EXPORT = qw (); %EXPORT_TAGS = (); @EXPORT_OK = (); # The following bit is not straight from the manpages # Check if symbolic links are available $symlinks = 1; eval { $_ = readlink $0; }; # Pointless assigment to make -w shut up if ($@) { warn "Symbolic links not available.\n"; $symlinks = undef; } # Check if Compress::Zlib is available $compression = 1; eval {require Compress::Zlib;}; if ($@) { warn "Compression not available.\n"; $compression = undef; } # Check for get* (they don't exist on WinNT) eval {$_=getpwuid(0)}; # Pointless assigment to make -w shut up $has_getpwuid = !$@; eval {$_=getgrgid(0)}; # Pointless assigment to make -w shut up $has_getgrgid = !$@; } use vars qw(@EXPORT_OK $tar_unpack_header $tar_header_length $error); $tar_unpack_header ='A100 A8 A8 A8 A12 A12 A8 A1 A100 A6 A2 A32 A32 A8 A8 A155'; $tar_header_length = 512; sub format_tar_entry; sub format_tar_file; ### ### Non-method functions ### sub drat {$error=$!;return undef} sub read_tar { my ($filename, $compressed) = @_; my @tarfile = (); my $i = 0; my $head; if ($compressed) { if ($compression) { $compressed = Compress::Zlib::gzopen($filename,"rb") or drat; # Open compressed $compressed->gzread($head,$tar_header_length); } else { $error = "Compression not available (install Compress::Zlib).\n"; return undef; } } else { open(TAR, $filename) or drat; binmode TAR; read(TAR,$head,$tar_header_length); } READLOOP: while (length($head)==$tar_header_length) { my ($name, # string $mode, # octal number $uid, # octal number $gid, # octal number $size, # octal number $mtime, # octal number $chksum, # octal number $typeflag, # character $linkname, # string $magic, # string $version, # two bytes $uname, # string $gname, # string $devmajor, # octal number $devminor, # octal number $prefix) = unpack($tar_unpack_header,$head); my ($data, $diff, $dummy); $mode = oct $mode; $uid = oct $uid; $gid = oct $gid; $size = oct $size; $mtime = oct $mtime; $chksum = oct $chksum; $devmajor = oct $devmajor; $devminor = oct $devminor; $name = $prefix."/".$name if $prefix; $prefix = ""; # some broken tar-s don't set the typeflag for directories # so we ass_u_me a directory if the name ends in slash $typeflag = 5 if $name =~ m|/$| and not $typeflag; last READLOOP if $head eq "\0" x 512; # End of archive # Apparently this should really be two blocks of 512 zeroes, # but GNU tar sometimes gets it wrong. See comment in the # source code (tar.c) to GNU cpio. substr($head,148,8) = " "; if (unpack("%16C*",$head)!=$chksum) { warn "$name: checksum error.\n"; } if ($compressed) { $compressed->gzread($data,$size); } else { if (read(TAR,$data,$size)!=$size) { $error = "Read error on tarfile."; close TAR; return undef; } } $diff = $size%512; if ($diff!=0) { if ($compressed) { $compressed->gzread($dummy,512-$diff); } else { read(TAR,$dummy,512-$diff); # Padding, throw away } } # Guard against tarfiles with garbage at the end last READLOOP if $name eq ''; $tarfile[$i++]={ name => $name, mode => $mode, uid => $uid, gid => $gid, size => $size, mtime => $mtime, chksum => $chksum, typeflag => $typeflag, linkname => $linkname, magic => $magic, version => $version, uname => $uname, gname => $gname, devmajor => $devmajor, devminor => $devminor, prefix => $prefix, data => $data}; } continue { if ($compressed) { $compressed->gzread($head,$tar_header_length); } else { read(TAR,$head,$tar_header_length); } } $compressed ? $compressed->gzclose() : close(TAR); return @tarfile; } sub format_tar_file { my @tarfile = @_; my $file = ""; foreach (@tarfile) { $file .= format_tar_entry $_; } $file .= "\0" x 1024; return $file; } sub write_tar { my ($filename) = shift; my ($compressed) = shift; my @tarfile = @_; my ($tmp); $tmp = format_tar_file @tarfile; if ($compressed) { if (!$compression) { $error = "Compression not available.\n"; return undef; } $compressed = Compress::Zlib::gzopen($filename,"wb") or drat; $compressed->gzwrite($tmp); $compressed->gzclose; } else { open(TAR, ">".$filename) or drat; binmode TAR; syswrite(TAR,$tmp,length $tmp); close(TAR) or carp "Failed to close $filename, data may be lost: $!\n"; } } sub format_tar_entry { my ($ref) = shift; my ($tmp,$file,$prefix,$pos); $file = $ref->{name}; if (length($file)>99) { $pos = index $file, "/",(length($file) - 100); next if $pos == -1; # Filename longer than 100 chars! $prefix = substr $file,0,$pos; $file = substr $file,$pos+1; substr($prefix,0,-155)="" if length($prefix)>154; } else { $prefix=""; } $tmp = pack("a100a8a8a8a12a12a8a1a100", $file, sprintf("%6o ",$ref->{mode}), sprintf("%6o ",$ref->{uid}), sprintf("%6o ",$ref->{gid}), sprintf("%11o ",$ref->{size}), sprintf("%11o ",$ref->{mtime}), " ", $ref->{typeflag}, $ref->{linkname}); $tmp .= pack("a6", $ref->{magic}); $tmp .= '00'; $tmp .= pack("a32",$ref->{uname}); $tmp .= pack("a32",$ref->{gname}); $tmp .= pack("a8",sprintf("%6o ",$ref->{devmajor})); $tmp .= pack("a8",sprintf("%6o ",$ref->{devminor})); $tmp .= pack("a155",$prefix); substr($tmp,148,6) = sprintf("%6o", unpack("%16C*",$tmp)); substr($tmp,154,1) = "\0"; $tmp .= "\0" x ($tar_header_length-length($tmp)); $tmp .= $ref->{data}; if ($ref->{size}>0) { $tmp .= "\0" x (512 - ($ref->{size}%512)) unless $ref->{size}%512==0; } return $tmp; } ### ### Methods ### # Constructor. Reads tarfile if given an argument that's the name of a # readable file. sub new { my $class = shift; my ($filename,$compressed) = @_; my $self = {}; bless $self, $class; $self->{'_filename'} = undef; if (!defined $filename) { return $self; } if (-r $filename) { $self->{'_data'} = [read_tar $filename,$compressed]; $self->{'_filename'} = $filename; return $self; } if (-e $filename) { carp "File exists but is not readable: $filename\n"; } return $self; } # Return list with references to hashes representing the tar archive's # component files. sub data { my $self = shift; return @{$self->{'_data'}}; } # Read a tarfile. Returns number of component files. sub read { my $self = shift; my ($file, $compressed) = @_; $self->{'_filename'} = undef; if (! -e $file) { carp "$file does not exist.\n"; $self->{'_data'}=[]; return undef; } elsif (! -r $file) { carp "$file is not readable.\n"; $self->{'_data'}=[]; return undef; } else { $self->{'_data'}=[read_tar $file, $compressed]; $self->{'_filename'} = $file; return scalar @{$self->{'_data'}}; } } # Write a tar archive to file sub write { my ($self) = shift @_; my ($file) = shift @_; my ($compressed) = shift @_; unless ($file) { return format_tar_file @{$self->{'_data'}}; } write_tar $file, $compressed, @{$self->{'_data'}}; } # Add files to the archive. Returns number of successfully added files. sub add_files { my ($self) = shift; my (@files) = @_; my $file; my ($mode,$uid,$gid,$rdev,$size,$mtime,$data,$typeflag,$linkname); my $counter = 0; local ($/); undef $/; foreach $file (@files) { if ((undef,undef,$mode,undef,$uid,$gid,$rdev,$size, undef,$mtime,undef,undef,undef) = stat($file)) { $data = ""; $linkname = ""; if (-f $file) { # Plain file $typeflag = 0; unless (open(FILE,$file)) { next; # Can't open file, for some reason. Try next one. } binmode FILE; $data = ; $data = "" unless defined $data; close FILE; } elsif (-l $file) { # Symlink $typeflag = 1; $linkname = readlink $file if $symlinks; } elsif (-d $file) { # Directory $typeflag = 5; } elsif (-p $file) { # Named pipe $typeflag = 6; } elsif (-S $file) { # Socket $typeflag = 8; # Bogus value, POSIX doesn't believe in sockets } elsif (-b $file) { # Block special $typeflag = 4; } elsif (-c $file) { # Character special $typeflag = 3; } else { # Something else (like what?) $typeflag = 9; # Also bogus value. } push(@{$self->{'_data'}},{ name => $file, mode => $mode, uid => $uid, gid => $gid, size => length $data, mtime => $mtime, chksum => " ", typeflag => $typeflag, linkname => $linkname, magic => "ustar\0", version => "00", # WinNT protection uname => $has_getpwuid?(getpwuid($uid))[0]:"unknown", gname => $has_getgrgid?(getgrgid($gid))[0]:"unknown", devmajor => 0, # We don't handle this yet devminor => 0, # We don't handle this yet prefix => "", 'data' => $data, }); $counter++; # Successfully added file } else { next; # stat failed } } return $counter; } sub remove { my ($self) = shift; my (@files) = @_; my $file; foreach $file (@files) { @{$self->{'_data'}} = grep {$_->{name} ne $file} @{$self->{'_data'}}; } return $self; } # Get the content of a file sub get_content { my ($self) = shift; my ($file) = @_; my $entry; ($entry) = grep {$_->{name} eq $file} @{$self->{'_data'}}; return $entry->{'data'}; } # Replace the content of a file sub replace_content { my ($self) = shift; my ($file,$content) = @_; my $entry; ($entry) = grep {$_->{name} eq $file} @{$self->{'_data'}}; if ($entry) { $entry->{'data'} = $content; return 1; } else { return undef; } } # Add data as a file sub add_data { my ($self, $file, $data, $opt) = @_; my $ref = {}; my ($key); $ref->{'data'}=$data; $ref->{name}=$file; $ref->{mode}=0666&(0777-umask); $ref->{uid}=$>; $ref->{gid}=(split(/ /,$)))[0]; # Yuck $ref->{size}=length $data; $ref->{mtime}=time; $ref->{chksum}=" "; # Utterly pointless $ref->{typeflag}=0; # Ordinary file $ref->{linkname}=""; $ref->{magic}="ustar\0"; $ref->{version}="00"; # WinNT protection $ref->{uname}=$has_getpwuid?(getpwuid($>))[0]:"unknown"; $ref->{gname}=$has_getgrgid?(getgrgid($ref->{gid}))[0]:"unknown"; $ref->{devmajor}=0; $ref->{devminor}=0; $ref->{prefix}=""; if ($opt) { foreach $key (keys %$opt) { $ref->{$key} = $opt->{$key} } } push(@{$self->{'_data'}},$ref); return 1; } # Write a single (probably) file from the in-memory archive to disk sub extract { my $self = shift; my (@files) = @_; my ($file, $current, @path); foreach $file (@files) { foreach (@{$self->{'_data'}}) { if ($_->{name} eq $file) { # For the moment, we assume that all paths in tarfiles # are given according to Unix standards. # Which they *are*, according to the tar format spec! (@path) = split(/\//,$file); $file = pop @path; $current = cwd; foreach (@path) { if (-e $_ && ! -d $_) { warn "$_ exists but is not a directory!\n"; next; } mkdir $_,0777 unless -d $_; chdir $_; } if (not $_->{typeflag}) { # Ordinary file open(FILE,">".$file); binmode FILE; print FILE $_->{'data'}; close FILE; } elsif ($_->{typeflag}==5) { # Directory if (-e $file && ! -d $file) { drat; } mkdir $file,0777 unless -d $file; } elsif ($_->{typeflag}==1) { symlink $_->{linkname},$file if $symlinks; } elsif ($_->{typeflag}==6) { warn "Doesn't handle named pipes (yet).\n"; return 1; } elsif ($_->{typeflag}==4) { warn "Doesn't handle device files (yet).\n"; return 1; } elsif ($_->{typeflag}==3) { warn "Doesn't handle device files (yet).\n"; return 1; } else { $error = "unknown file type: $_->{typeflag}"; return undef; } utime time, $_->{mtime}, $file; # We are root, and chown exists if ($>==0 and $^O ne "MacOS" and $^O ne "MSWin32") { chown $_->{uid},$_->{gid},$file; } # chmod is done last, in case it makes file readonly # (this accomodates DOSish OSes) chmod $_->{mode},$file; chdir $current; } } } } # Return a list of all filenames in in-memory archive. sub list_files { my ($self) = shift; return map {$_->{name}} @{$self->{'_data'}}; } ### Standard end of module :-) 1; =head1 NAME Tar - module for manipulation of tar archives. =head1 SYNOPSIS use Archive::Tar; $tar = Archive::Tar->new(); $tar->read("origin.tar.gz",1); $tar->add_files("file/foo.c", "file/bar.c"); $tar->add_data("file/baz.c","This is the file contents"); $tar->write("files.tar"); =head1 DESCRIPTION This is a module for the handling of tar archives. At the moment these methods are implemented: =over 4 =item C Returns a new Tar object. If given a filename as an argument, it will try to load that as a tar file. If given a true value as a second argument, will assume that the tar file is compressed, and will attempt to read it using L. =item C Takes a list of filenames and adds them to the in-memory archive. I suspect that this function will produce bogus tar archives when used under MacOS, but I'm not sure and I have no Mac to test it on. =item C Takes a filename, a scalar full of data and optionally a reference to a hash with specific options. Will add a file to the in-memory archive, with name C<$filename> and content C<$data>. Specific options can be set using C<$opthashref>, which will be documented later. =item C Removes any entries with names matching any of the given filenames from the in-memory archive. String comparisons are done with C. =item C',$compressed)> Try to read the given tarfile into memory. If the second argument is a true value, the tarfile is assumed to be compressed. Will I any previous content in C<$tar>! =item C',$compressed)> Will write the in-memory archive to disk. If no filename is given, returns the entire formatted archive as a string, which should be useful if you'd like to stuff the archive into a socket or a pipe to gzip or something. If the second argument is true, the module will try to write the file compressed. =item C Returns the in-memory archive. This is a list of references to hashes, the internals of which is not currently documented. =item C Write files whose names are equivalent to any of the names in C<@filenames> to disk, creating subdirectories as neccesary. This might not work too well under VMS and MacOS. =item C Returns a list with the names of all files in the in-memory archive. =item C Return the content of the named file. =item C Make the string $content be the content for the file named $file. =back =head1 CHANGES =over 4 =item Version 0.071 Minor release. Arrange to chmod() at the very end in case it makes the file readonly. Win32 is actually picky about that. SunOS 4.x tar makes tarfiles that contain directory entries that don't have typeflag set properly. We use the trailing slash to recognize directories in such tarfiles. =item Version 0.07 Fixed (hopefully) broken portability to MacOS, reported by Paul J. Schinder at Goddard Space Flight Center. Fixed two bugs with symlink handling, reported in excellent detail by an admin at teleport.com called Chris. Primive tar program (called ptar) included with distribution. Useage should be pretty obvious if you've used a normal tar program. Added methods get_content and replace_content. Added support for paths longer than 100 characters, according to POSIX. This is compatible with just about everything except GNU tar. Way to go, GNU tar (use a better tar, or GNU cpio). NOTE: When adding files to an archive, files with basenames longer than 100 characters will be silently ignored. If the prefix part of a path is longer than 155 characters, only the last 155 characters will be stored. =item Version 0.06 Added list_files() method, as requested by Michael Wiedman. Fixed a couple of dysfunctions when run under Windows NT. Michael Wiedmann reported the bugs. Changed the documentation to reflect reality a bit better. Fixed bug in format_tar_entry. Bug reported by Michael Schilli. =item Version 0.05 Quoted lots of barewords to make C stop complaining under perl version 5.003. Ties to L put in. Will warn if it isn't available. $tar->write() with no argument now returns the formatted archive. =item Version 0.04 Made changes to write_tar so that Solaris tar likes the resulting archives better. Protected the calls to readlink() and symlink(). AFAIK this module should now run just fine on Windows NT. Add method to write a single entry to disk (extract) Added method to add entries entirely from scratch (add_data) Changed name of add() to add_file() All calls to croak() removed and replaced with returning undef and setting Tar::error. Better handling of tarfiles with garbage at the end. =cut