#!/usr/bin/perl # # $Id: psync,v 0.67 2004/05/03 14:53:30 dankogai Exp $ # $Id: psync,v 0.69.3 2004/12/18 11:28:00 jacek_s Exp $ # eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell use strict; use Getopt::Long; use Fcntl; use DB_File; use File::Basename; use MacOSX::File; use MacOSX::File::Copy; use MacOSX::File::Info; use Fcntl qq(:mode); my $Debug = 0; my $Psync_DB = '.psync.db'; my ( $opt_a, $opt_d, $opt_f, $opt_m, $opt_n, $opt_p, $opt_q, $opt_r, $opt_v, ); my @opt_excludes = (); Getopt::Long::Configure("bundling"); GetOptions( "D" => \$Debug, "a" => \$opt_a, "d:n" => \$opt_d, "f" => \$opt_f, "m" => \$opt_m, "n" => \$opt_n, "p" => \$opt_p, "q" => \$opt_q, "r" => \$opt_r, "v:n" => \$opt_v, "exclude=s" => \@opt_excludes, ); defined $opt_d and $opt_d ||= 1; $opt_v ||= 1; $opt_q and $opt_v = 0; $opt_a and $opt_v ||= 1; ###js force -v when -a is set $Debug and print <<"EOT"; \$opt_a = $opt_a, \$opt_d = $opt_d, \$opt_f = $opt_f, \$opt_m = $opt_m, \$opt_n = $opt_n, \$opt_p = $opt_p, \$opt_q = $opt_q, \$opt_r = $opt_r, \$opt_v = $opt_v, \@opt_excludes = @opt_excludes, EOT ###js - changed most patterns to match anywhere (^/ -> .*/ ###js - changed /private/var/run/.* -> .*\.pid ###js - added /Volumes/xxx/dev/, /private/tmp, Library/Caches and Library/Safari/Icons my $IgnorePat = qr[ ^/dev/.* | ^/Volumes/[^/]*/dev/.* | .*/tmp/.* | .*/private/tmp/.* | .*/private/var/tmp/.* | .*/private/var/vm/.* | .*/private/var/run/.*\.pid | .*/Temporary\ Items/.* | .*/Library/Caches/.* | .*/Library/Safari/Icons/.* ]xo; my @ExcludePats = (); push @ExcludePats, $IgnorePat; push @ExcludePats, eval "qr[\\Q$_]o" foreach (@opt_excludes); my %IgnoreFiles = map { $_ => 1 } ( $Psync_DB, # '.DS_Store', ###js I like to keep those '.FBCIndex', '.FBCLockFolder', '.Trashes', 'AppleShare PDS', 'Desktop DB', 'Desktop DF', 'TheFindByContentFolder', 'TheVolumeSettingsFolder', ); my $Del_Ignored = $opt_d > 1 ? 1 : 0; my $Topdev; # Maybe we should tie them to DB_File to save memory my (%Signature, %Attribs, %Action, %Root) = (); select(STDOUT); $|=1; #autoflush ###js check arguments scalar @ARGV >= 2 or help(); for my $arg (@ARGV){ -e $arg or die "$arg does not exist"; } my $Dst = pop @ARGV; -d $Dst or die "$Dst is not a directory"; if ($opt_r){ scalar @ARGV != 1 and help(); if (-f "$ARGV[0]/$Psync_DB"){ # remote restore mode $opt_v and do_log("Using $ARGV[0]/$Psync_DB to retrieve extra attributes."); tie (%Attribs, 'DB_File', "$ARGV[0]/$Psync_DB", O_RDONLY, 0440, $DB_HASH) or die "$Dst/$Psync_DB : $!"; $opt_r = 1; }else{ $opt_r = 2; # remote backup mode } } my $ScanCount = 0; $opt_v and do_log("Scanning Destination Directory $Dst ..."); $Topdev = (lstat($Dst))[0]; scantree($Dst, '', -1); $opt_v and do_log("\n$ScanCount items found."); for my $src (@ARGV){ $ScanCount = 0; $opt_v and do_log("Scanning Source Item $src ..."); $Topdev = (lstat($src))[0]; scantree($src, '' , +1); $opt_v and do_log("\n$ScanCount items found."); } my ($n_del, $n_unchg, $n_copy) = (0,0,0); ###js - global so they can be used later while(my ($k, $v) = each %Action){ $k or $n_unchg++ and next; ###js add empty (root) element to unchanged (so the numbers add up) $v < 0 and $n_del++; $v == 0 and $n_unchg++; $v > 0 and $n_copy++; } if ($opt_v){ do_log(sprintf "%8d items to delete,", $n_del); $opt_v > 1 and do_log(sprintf "%8d items unchanged,", $n_unchg); ###js added $opt_v > 1 (less useful info) do_log(sprintf "%8d items to copy/fix.", $n_copy); } if ($opt_d and $n_del > 0){ $opt_v and $opt_n or $opt_a ? do_log("would delete ...") : do_log("deleting items ..."); ###js added $opt_n or $opt_a # sort must be this order for depth-first traversal for my $k (sort {$b cmp $a} keys %Action){ $k or next; $Action{$k} < 0 or next; my $dpath = $Dst . $k; unless ($opt_n){ ###js - if $dpath is a link with no target, -e will fail -e $dpath or -l $dpath or next; $opt_v and do_log("- $dpath"); ###js - if a folder, we may need to do a recursive delete if (-d $dpath and ! -l $dpath){ $opt_a or rmdir $dpath or rmtree($dpath); } else{ $opt_a or unlink $dpath or unlock($dpath) and unlink($dpath) or warn "$dpath : $!"; ###js added $opt_a and unlock } my $atticf = dirname($dpath) . '/._' . basename($dpath); if (-f $atticf){ $opt_a or unlink $atticf or warn "$atticf : $!"; ###js added $opt_a } } } } if ($n_copy > 0){ $opt_v and $opt_n or $opt_a ? do_log("would copy ...") : do_log("copying items ..."); ###js added $opt_n or $opt_a # sort must be this order for depth-last traversal for my $k (sort keys %Action){ my $spath = $Root{$k} . $k; $spath =~ s,^/+,/,o; my $dpath = $Dst . $k; $Action{$k} == 0 and $opt_v > 1 and do_log("== $spath"); $Action{$k} > 0 or next; unless ($opt_n){ my ($size, $mtime) = unpack("N2", $Signature{$k}); my ($mode, $uid, $gid, $atime) = unpack("N4", $Attribs{$k}); if (S_ISDIR($mode)){ # -d unless (-d $dpath){ ###js - to replace file or link with a folder, delete it first if (-e $dpath or -l $dpath){ $opt_v and do_log("--d $dpath"); $opt_a or unlink $dpath; ###js added $opt_a } $opt_v and do_log("+d $spath"); $opt_a or mkdir $dpath, 0755 or warn "$dpath : $!"; ###js added $opt_a }else{ $opt_v > 1 and do_log("=d $spath"); } }elsif (S_ISREG($mode)){ # -f ###js - to replace link or folder with a file, delete it first if (-l $dpath){ $opt_v and do_log("--l $dpath"); $opt_a or unlink $dpath or warn "$dpath : $!"; } elsif (-d $dpath){ $opt_v and do_log("--d $dpath"); $opt_a or rmdir $dpath or rmtree($dpath); } $opt_v and do_log("+f $spath"); $opt_a or copy($spath, $dpath) or unlock($dpath) and copy($spath, $dpath) ###js added unlock or warn "$spath -> $dpath : ", &MacOSX::File::strerr, " ($MacOSX::File::CopyErr)"; ###js show error name $opt_a or copyattrib($spath, $dpath, $mode, $uid, $gid, $atime, $mtime); ###js added $opt_a }elsif (S_ISLNK($mode)){ # -l my $slink = readlink($spath); my $dlink = readlink($dpath); if ($slink ne $dlink){ $opt_v and do_log("+l $spath"); $opt_v > 3 and do_log("source link: $slink"); $opt_v > 3 and do_log("destin link: $dlink"); #$opt_a or unlink $dpath && symlink($slink, $dpath) or warn "symlink($slink,$dpath) failed"; ###js added $opt_a and warning if (not $opt_a){ unlink $dpath; symlink($slink, $dpath) or warn "symlink($slink,$dpath) failed"; ###js added $opt_a and warning } } #else{ # $opt_v and do_log("++l $spath"); #} #$opt_a or copylinkattr($spath, $dpath, $mode, $uid, $gid, $atime, $mtime); ###js added } } } } if ($n_copy > 0 and not $opt_p){ $opt_v and $opt_n or $opt_a ? do_log("would fix directory attributes ...") : do_log("fixing directory attributes ..."); ###js added $opt_n or $opt-a # sort must be this order for depth-first traversal for my $k (sort {$b cmp $a} keys %Action){ $Action{$k} > 0 or next; my ($size, $mtime) = unpack("N2", $Signature{$k}); my ($mode, $uid, $gid, $atime) = unpack("N4", $Attribs{$k}); S_ISDIR($mode) or next; # -d my $spath = $Root{$k} . $k; $spath =~ s,^/+,/,o; my $dpath = $Dst . $k; unless ($opt_n){ $opt_v and do_log(sprintf "0%04o,%s,%s $dpath", ($mode & 07777), (getpwuid($uid))[0],(getgrgid($gid))[0] ); $opt_a or copyattrib($spath, $dpath, $mode, $uid, $gid, $atime, $mtime); ###js added $opt_a } } } if ($opt_r >= 2){ # these are to make DB operation fast enough my $hashinfo = DB_File::HASHINFO->new; $hashinfo->{nelem} = scalar keys %Action; $hashinfo->{bsize} = 1024; # MAXPATHLEN $hashinfo->{cachesize} = 4 * 1024 * 1024; tie (my %db, 'DB_File', $Psync_DB, O_CREAT|O_RDWR, 0640, $hashinfo) or die "$Psync_DB : $!"; $opt_v and do_log("Using $Dst/$Psync_DB to store extra attributes."); my $count; while ( my ($k, $v) = each %Action){ if ($v >= 0){ $db{$k} = $Attribs{$k}; $count++ % 10000 == 0 and do_log("$count items stored."); } } untie %db; move $Psync_DB, "$Dst/$Psync_DB" or die "Can't move $Psync_DB"; } exit; sub copyattrib{ my ($spath, $dpath, $mode, $uid, $gid, $atime, $mtime) = @_; my $finfo = getfinfo($spath); unless ($opt_r > 1){ chmod $mode & 07777, $dpath; chown $uid, $gid, $dpath; } $finfo and $finfo->set($dpath); utime $atime, $mtime, $dpath; } sub unlock{ ###js added new function to deal with deleting HFS locked files my ($dpath) = @_; my $finfo = getfinfo($dpath); $finfo and $finfo->unlock($dpath); setfinfo($finfo, $dpath); } sub rmtree{ ###js created new function to deal with folders in $IgnorePat my ($path) = @_; opendir my $d, $path or warn "$path:$!"; my @f = grep !/^(?:\.$|\.\.$)/o, readdir $d; # skip . and .. closedir $d; for my $f (@f){ my $fpath = "$path/$f"; if (-d $fpath and ! -l $fpath){ rmtree($fpath) or warn "$fpath : $!"; }else{ $opt_v and do_log("-- $fpath"); unlink $fpath or unlock($fpath) and unlink $fpath or warn "$fpath : $!"; } } $opt_v and do_log("-- $path"); rmdir $path; } sub do_log{ print shift, "\n"; } sub sig2txt{ return sprintf("0x%08x,0x%08x",unpack("N2",shift)); } sub addsig{ my ($path,$mode,$uid,$gid,$size,$atime,$mtime,$action) = @_; my $sig = pack("N2", (S_ISREG($mode) ? $size : 0), $mtime); my $atr = $Attribs{$path}; ###js save old attributes for comparison tied %Attribs or $Attribs{$path} = pack("N4", $mode, $uid, $gid, $atime); if ($opt_v > 3 and $action > 0){ do_log qq(was: ) . sig2txt($Signature{$path}); do_log qq(now: ) . sig2txt($sig); } if ($Signature{$path}){ my ($oldsize, $oldmtime) = unpack("N2", $Signature{$path}); S_ISREG($mode) or $size = 0; my $dif = abs($oldmtime - $mtime); if (($oldsize == $size) && ($dif < 2 || ###js SMB shares use 2 second resolution on mod time abs($dif - 3600) < 2)) { ###js after change from/to daylight time there may be 1 hour difference if ($opt_m){ $opt_f or $action = 0; # same file }else{ ###js check for change in mode and ownership my ($oldmode, $olduid, $oldgid, $oldatime) = unpack("N4", $atr); if ($opt_v > 3 and $action > 0){ do_log qq(old mode/uid/gid: ) . sprintf("%o, %d, %d", $oldmode, $olduid, $oldgid); do_log qq(new mode/uid/gid: ) . sprintf("%o, %d, %d", $mode, $uid, $gid); } if ($mode == $oldmode && $uid == $olduid && $gid == $oldgid){ $opt_f or $action = 0; # same file } } }else{ $Signature{$path} = $sig; # different } }else{ $Signature{$path} = $sig; # first time seen } $Action{$path} = $action; $opt_v > 2 and do_log(join("," => $Action{$path}, sprintf("0x%08x,0x%08x", unpack("N2",$Signature{$path})), $path)); $opt_v > 3 and $action >= 0 and do_log; } # File::Find is too general purpose thus slow. # we implement our own traversal routine sub scantree { my ($root, $path, $action) = @_; if ($opt_v < 3){###js - when -v >= 3 this interferes with other output if ($ScanCount == 0){ ###js - prettified - removed empty line printf "%10d:", $ScanCount; } else{ $ScanCount % 8192 == 0 and printf "\n%10d:", $ScanCount; } $ScanCount % 128 == 0 and print "."; } $ScanCount++; my $fpath = $root . $path; ###js - use full path (instead of just $path) if (grep { $fpath =~ $_} @ExcludePats){ if ($action < 0 and $Del_Ignored){ $opt_v > 2 and do_log(sprintf "%s in ExcludePats & Del_Ignored - skip, but delete destination", $path); addsig($path, 0, 0, 0, 0, 0, 0, -1); } else{ $opt_v > 2 and do_log(sprintf "%s in ExcludePats - skip", $path); } return; } $action > 0 and $Root{$path} = $root; my ($dev, $mode, $nlink, $uid, $gid, $size, $atime, $mtime) = (lstat($fpath))[0,2,3,4,5,7,8,9] or warn "can't stat $fpath"; addsig($path, $mode, $uid, $gid, $size, $atime, $mtime, $action); if (-d _){ $dev != $Topdev and return; opendir my $d, $fpath or warn "$fpath:$!"; my @f = grep !/^\.(?:\.?$|_)/o, readdir $d; # skip . .. and ._* closedir $d; for my $f (@f){ my $spath = "$path/$f"; if ($IgnoreFiles{$f} and $action < 0 and $f ne $Psync_DB and $Del_Ignored){ $opt_v > 2 and do_log(sprintf "%s in IgnoreFiles & Del_Ignored - skip, but delete destination", $f); addsig($spath, $mode, $uid, $gid, $size, $mtime, $atime, -1); }else{ scantree($root, $spath, $action); } } } } sub help{ print <<"EOT"; psync 0.69.3 - usage: psync [-a][-d][-f][-m][-n][-p][-q|-v][--exclude=pat]* source_items ... target_directory psync -r[-a][-d][-f][-m][-n][-p][-q|-v][--exclude=pat]* source_directory target_directory for more type man psync EOT exit; } 1; __END__ =head1 NAME psync -- update copy =head1 SYNOPSIS psync [-a] [-d] [-f] [-m] [-n] [-p] [-r] [-q|-vI] [--exclude=pat]* source_items ... target_directory =head1 DESCRIPTION psync does an update copy. It compares source directory and target directory at first, then erases items that are nonexistent on source directory if specified and finally copies everything on source directory. Items with the same modification date and (data fork) size remain untouched, saving time on operation. Currently psync supports options below =over 4 =item -a "Full Simulation mode". It prints (on standard output) details of what it would do but does nothing. =item -dI Delete nonexistent files before starting copy. -d2 = also delete files matching "Ignore Pattern": (.*/tmp/.*,.*/dev/.*,.*/private/tmp/.*,.*/private/var/tmp/.*, .*/private/var/vm/.*,/ .*/private/var/run/.*\.pid, .*/Temporary\ Items/.*, .*/Library/Caches/.*,.*/Library/Safari/Icons/.*) and "Ignored Files": (.FBCIndex,.FBCLockFolder,.Trashes,AppleShare PDS,Desktop DB, Desktop DF,TheFindByContentFolder,TheVolumeSettingsFolder) CAVEAT: Prior to 0.50 this option was default. =item -f Force copy. Copy files even when the file remains unchanged. =item -m Don't use mode and ownership when comparing files - use only file size and modifications date (to speed it up). =item -n "Simulation mode". It prints (on standard output) a summary of what it would do but does nothing. =item -p Do not fix directory attributes (permissions). Useful when source is on a volume that does not support permissions or has a completely different idea of them (like Windows NT) =item -q Quiet mode. Sets verbose level to 0. =item -r Remote backup/restore mode. Ownership and permissions are stored/retrieved via C<.psync.db> If the source directory contains a file C<.psync.db>, psync turns into remote restore mode. It uses .psync.db on source directory to restore ownership and permissions. If not, psync turns into remote backup mode. After the backup it stores ownership and permissions to C<.psync.db> As the name suggests, this option is imperative when the backup directory is on remote volume such as AFP, NFS, and Samba. =item -vI Sets verbose level. Default verbose level is 1; It prints only items that are changed. Level 2 prints unchanged files also. Level 3 and above are practically debugging mode. =item --exclude Add an exclude pattern. This may be repeated as many times as you wish. This will be treated as a fixed string, not as a regular expression. =back =head1 EXAMPLE To backup everything in startup volume, all you have to say is sudo psync -d / /Volumes/I And the resulting I volume is fully-bootable copy thereof. Note C or root privilege is necessary to restore file ownership. =head1 PERFORMANCE On PowerBook G3 (pismo) with G3/400, 384MB Memory, I tested with C. The boot volume contains no more than vanilla OS X 10.1.2 and Developer kit. It had a little over 10000 items and 1.8 GB of used space. Here is the result; HFS+ on Pismo's Expansion Bay 2539.48 real 121.97 user 290.78 sys 452.98 real 47.29 user 39.38 sys UFS on Pismo's Expansion Bay 9278.25 real 775.60 user 667.82 sys 1086.35 real 69.19 user 53.68 sys HFS+ Disk Image on AFP Volume 3127.60 real 217.51 user 445.04 sys 1059.37 real 69.80 user 52.00 sys DVD-RAM formatted as HFS+ 12258.39 real 210.52 user 441.67 sys 564.49 real 62.47 user 46.65 sys NFS 13227.76 real 429.44 user 583.40 sys 2348.72 real 83.87 user 88.10 sys Note screensaver was on with some other background programs. I used this program happily with my PowerBook G4 (Ti) while I am surfing the web and listening to iTunes at the same time letting SETI@Home search for cosmic programmers :) With MacOS X, background backup is no problem =head1 FILES =over 4 =item .psync.db Berkeley DB Hash file used to store ownership and permission information when -r option is set. =back =head1 BUGS Backing up to AFP volume may lose some files with Unicode names other than the language you specified when you mount the volume. That is, When you mount the volume with "Japanese" support, You may fail to backup files with Korean and Chinese names. AFP prior to MacOSX ( including Netatalk 1.5.x) is also vulnerable to file names that are longer than 31 bytes. Old AFP also suffers the problem of 2GB file size limit. This may stand in your way when you try to backup on disk image on AFP volume. AFP on MacOS X (that is, AFP server is MacOS X) does not have this problem. In theory the backup also works on WebDAV and SMB but they remain untested. =head1 DISCLAIMER The author of this utility will be held no responsibility for any damages and losses of data and/or files that may be caused by the use thereof. B =head1 AUTHOR Dan Kogai =head1 SEE ALSO L hfstar F hfspax F C F =head1 COPYRIGHT Copyright 2002 Dan Kogai This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.