#!/usr/bin/perl
#
# $Id: psync,v 0.67 2004/05/03 14:53:30 dankogai Exp $
# $Id: psync,v 0.69.3 2004/12/18 11:28:00 jacek_s Exp $
#

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

use strict;
use Getopt::Long;
use Fcntl;
use DB_File;
use File::Basename;
use MacOSX::File;
use MacOSX::File::Copy;
use MacOSX::File::Info;
use Fcntl qq(:mode);

my $Debug = 0;
my $Psync_DB = '.psync.db';

my (
    $opt_a,
    $opt_d,
    $opt_f,
    $opt_m,
    $opt_n,
    $opt_p, 
    $opt_q, 
    $opt_r,
    $opt_v, 
    );
my @opt_excludes = ();

Getopt::Long::Configure("bundling");
GetOptions(
           "D"   => \$Debug,
           "a"   => \$opt_a,
           "d:n" => \$opt_d,
           "f"   => \$opt_f,
           "m"   => \$opt_m,
           "n"   => \$opt_n,
           "p"   => \$opt_p,
           "q"   => \$opt_q,
           "r"   => \$opt_r,
           "v:n" => \$opt_v,
           "exclude=s" => \@opt_excludes,
            );

defined $opt_d and $opt_d ||= 1;
$opt_v ||= 1;
$opt_q and $opt_v = 0;
$opt_a and $opt_v ||= 1; ###js force -v when -a is set

$Debug and print <<"EOT";
\$opt_a = $opt_a,
\$opt_d = $opt_d,
\$opt_f = $opt_f,
\$opt_m = $opt_m,
\$opt_n = $opt_n,
\$opt_p = $opt_p,
\$opt_q = $opt_q,
\$opt_r = $opt_r,
\$opt_v = $opt_v,
\@opt_excludes = @opt_excludes,
EOT

###js - changed most patterns to match anywhere (^/ -> .*/
###js - changed /private/var/run/.* -> .*\.pid 
###js - added /Volumes/xxx/dev/, /private/tmp, Library/Caches and Library/Safari/Icons
my $IgnorePat =
    qr[
            ^/dev/.*
          | ^/Volumes/[^/]*/dev/.*
          | .*/tmp/.*
          | .*/private/tmp/.*
          | .*/private/var/tmp/.*
          | .*/private/var/vm/.* 
          | .*/private/var/run/.*\.pid 
          | .*/Temporary\ Items/.*
          | .*/Library/Caches/.*
          | .*/Library/Safari/Icons/.*
       ]xo;

my @ExcludePats = ();
push @ExcludePats, $IgnorePat;
push @ExcludePats, eval "qr[\\Q$_]o" foreach (@opt_excludes);

my %IgnoreFiles = map { $_ => 1 }
(
 $Psync_DB,
# '.DS_Store', ###js I like to keep those
 '.FBCIndex',
 '.FBCLockFolder',
 '.Trashes',
 'AppleShare PDS',
 'Desktop DB',
 'Desktop DF',
 'TheFindByContentFolder',
 'TheVolumeSettingsFolder',
 );

my $Del_Ignored = $opt_d > 1 ? 1 : 0;

my $Topdev;
# Maybe we should tie them to DB_File to save memory
my (%Signature, %Attribs, %Action, %Root) = ();

select(STDOUT);
$|=1; #autoflush

###js check arguments
scalar @ARGV >= 2 or help();
for my $arg (@ARGV){
    -e $arg or die "$arg does not exist";
}

my $Dst = pop @ARGV;
-d $Dst or die "$Dst is not a directory";
if ($opt_r){
    scalar @ARGV != 1 and help();
    if (-f "$ARGV[0]/$Psync_DB"){ # remote restore mode
        $opt_v and 
            do_log("Using $ARGV[0]/$Psync_DB to retrieve extra attributes.");
        tie (%Attribs, 'DB_File', "$ARGV[0]/$Psync_DB",
             O_RDONLY, 0440, $DB_HASH) or die "$Dst/$Psync_DB : $!";
        $opt_r = 1;
    }else{
        $opt_r = 2;               # remote backup mode
    }
}

my $ScanCount = 0;
$opt_v and do_log("Scanning Destination Directory $Dst ...");
$Topdev = (lstat($Dst))[0];
scantree($Dst, '', -1);
$opt_v and do_log("\n$ScanCount items found.");

for my $src (@ARGV){
    $ScanCount = 0;
    $opt_v and do_log("Scanning Source Item $src ...");
    $Topdev = (lstat($src))[0];
    scantree($src, '' , +1);
    $opt_v and do_log("\n$ScanCount items found.");
}

my ($n_del, $n_unchg, $n_copy) = (0,0,0); ###js - global so they can be used later
while(my ($k, $v) = each %Action){
    $k or $n_unchg++ and next; ###js add empty (root) element to unchanged (so the numbers add up)
    $v <  0 and $n_del++;
    $v == 0 and $n_unchg++;
    $v >  0 and $n_copy++;
}
if ($opt_v){
    do_log(sprintf "%8d items to delete,", $n_del);
    $opt_v > 1 and do_log(sprintf "%8d items unchanged,", $n_unchg); ###js added $opt_v > 1 (less useful info)
    do_log(sprintf "%8d items to copy/fix.", $n_copy);
}

if ($opt_d and $n_del > 0){
    $opt_v and $opt_n or $opt_a ? do_log("would delete ...") 
                                : do_log("deleting items ..."); ###js added $opt_n or $opt_a
    # sort must be this order for depth-first traversal
    for my $k (sort {$b cmp $a} keys %Action){
        $k or next; $Action{$k} <  0 or next;
        my $dpath = $Dst . $k;
        unless ($opt_n){
            ###js - if $dpath is a link with no target, -e will fail
            -e $dpath or -l $dpath or next;

            $opt_v and do_log("- $dpath");
            
            ###js - if a folder, we may need to do a recursive delete
            if (-d $dpath and ! -l $dpath){
                $opt_a or rmdir $dpath or rmtree($dpath);
            }
            else{
                $opt_a or unlink $dpath or unlock($dpath) and unlink($dpath) or warn "$dpath : $!"; ###js added $opt_a and unlock
            }
            my $atticf = dirname($dpath) . '/._' . basename($dpath);
            if (-f $atticf){
                $opt_a or unlink $atticf or warn "$atticf : $!"; ###js added $opt_a
            }
        }
    }
}

if ($n_copy > 0){
    $opt_v and $opt_n or $opt_a ? do_log("would copy ...") 
                                : do_log("copying items ..."); ###js added $opt_n or $opt_a
    # sort must be this order for depth-last traversal
    for my $k (sort keys %Action){
        my $spath = $Root{$k} . $k; $spath =~ s,^/+,/,o;
        my $dpath = $Dst . $k;
        $Action{$k} == 0 and $opt_v > 1 and do_log("== $spath");
        $Action{$k} >  0 or next;
        unless ($opt_n){
            my ($size, $mtime)                = unpack("N2", $Signature{$k});
            my ($mode, $uid,  $gid,  $atime)  = unpack("N4", $Attribs{$k});
            if     (S_ISDIR($mode)){ # -d
                unless (-d $dpath){
                    
                    ###js - to replace file or link with a folder, delete it first
                    if (-e $dpath or -l $dpath){
                        $opt_v and do_log("--d $dpath");
                        $opt_a or unlink $dpath; ###js added $opt_a
                    }
                    
                    $opt_v and do_log("+d $spath");
                    $opt_a or mkdir $dpath, 0755 or warn "$dpath : $!"; ###js added $opt_a
                }else{
                    $opt_v > 1 and do_log("=d $spath");
                }
            }elsif (S_ISREG($mode)){ # -f
                ###js - to replace link or folder with a file, delete it first
                if (-l $dpath){
                    $opt_v and do_log("--l $dpath");
                    $opt_a or unlink $dpath or warn "$dpath : $!";
                }
                elsif (-d $dpath){
                    $opt_v and do_log("--d $dpath");
                    $opt_a or rmdir $dpath or rmtree($dpath);
                }
                
                $opt_v and do_log("+f $spath");
                $opt_a or copy($spath, $dpath) or unlock($dpath) and copy($spath, $dpath) ###js added unlock
                    or warn "$spath -> $dpath : ", &MacOSX::File::strerr, " ($MacOSX::File::CopyErr)"; ###js show error name
                $opt_a or copyattrib($spath, $dpath, $mode, $uid, $gid, $atime, $mtime); ###js added $opt_a
            }elsif (S_ISLNK($mode)){ # -l
                my $slink = readlink($spath);
                my $dlink = readlink($dpath);
                if ($slink ne $dlink){
                    $opt_v and do_log("+l $spath");
                    $opt_v > 3 and do_log("source link: $slink");
                    $opt_v > 3 and do_log("destin link: $dlink");
                    #$opt_a or unlink $dpath && symlink($slink, $dpath) or warn "symlink($slink,$dpath) failed"; ###js added $opt_a and warning
                    if (not $opt_a){
                    	unlink $dpath;
                    	symlink($slink, $dpath) or warn "symlink($slink,$dpath) failed"; ###js added $opt_a and warning
                    }
                }
                #else{
                #    $opt_v and do_log("++l $spath");
                #}
                #$opt_a or copylinkattr($spath, $dpath, $mode, $uid, $gid, $atime, $mtime); ###js added
            }
        }
    }
}

if ($n_copy > 0 and not $opt_p){
    $opt_v and $opt_n or $opt_a ? do_log("would fix directory attributes ...") 
                                : do_log("fixing directory attributes ..."); ###js added $opt_n or $opt-a
    # sort must be this order for depth-first traversal
    for my $k (sort {$b cmp $a} keys %Action){
        $Action{$k} > 0 or next;
        my ($size, $mtime)                = unpack("N2", $Signature{$k});
        my ($mode, $uid,  $gid,  $atime)  = unpack("N4", $Attribs{$k});
        S_ISDIR($mode) or next;  # -d
        my $spath = $Root{$k} . $k; $spath =~ s,^/+,/,o;
        my $dpath = $Dst . $k;
        unless ($opt_n){
            $opt_v and do_log(sprintf "0%04o,%s,%s $dpath", ($mode & 07777), 
                              (getpwuid($uid))[0],(getgrgid($gid))[0] );
            $opt_a or copyattrib($spath, $dpath, $mode, $uid, $gid, $atime, $mtime); ###js added $opt_a
        }
    }
}

if ($opt_r >= 2){
    # these are to make DB operation fast enough
    my $hashinfo = DB_File::HASHINFO->new;
    $hashinfo->{nelem} = scalar keys %Action;
    $hashinfo->{bsize} = 1024; # MAXPATHLEN
    $hashinfo->{cachesize} = 4 * 1024 * 1024;
    tie (my %db, 'DB_File', $Psync_DB,  O_CREAT|O_RDWR, 0640, $hashinfo)
        or die "$Psync_DB : $!";
    $opt_v and do_log("Using $Dst/$Psync_DB to store extra attributes.");
    my $count;
    while ( my ($k, $v) = each %Action){
        if ($v >= 0){
            $db{$k} = $Attribs{$k};
            $count++ % 10000 == 0 and do_log("$count items stored.");
        }
    }
    untie %db;
    move $Psync_DB, "$Dst/$Psync_DB" or die "Can't move $Psync_DB";
}

exit;

sub copyattrib{
    my ($spath, $dpath, $mode, $uid, $gid, $atime, $mtime) = @_;
    my $finfo = getfinfo($spath);
    unless ($opt_r > 1){
        chmod $mode & 07777, $dpath;
        chown $uid,   $gid,  $dpath;
    }
    $finfo and $finfo->set($dpath);
    utime $atime, $mtime,  $dpath;
}

sub unlock{ ###js added new function to deal with deleting HFS locked files
    my ($dpath) = @_;
    my $finfo = getfinfo($dpath);
    $finfo and $finfo->unlock($dpath);
    setfinfo($finfo, $dpath);
}

sub rmtree{ ###js created new function to deal with folders in $IgnorePat
    my ($path) = @_;
    opendir my $d, $path or warn "$path:$!";
    my @f = grep !/^(?:\.$|\.\.$)/o, readdir $d; # skip . and ..
    closedir $d;
    for my $f (@f){
        my $fpath = "$path/$f";
        if (-d $fpath and ! -l $fpath){
            rmtree($fpath) or warn "$fpath : $!";
        }else{
            $opt_v and do_log("-- $fpath");
            unlink $fpath or unlock($fpath) and unlink $fpath or warn "$fpath : $!";
        }
    }
    $opt_v and do_log("-- $path");
    rmdir $path;
}

sub do_log{
    print shift, "\n";
}

sub sig2txt{
    return sprintf("0x%08x,0x%08x",unpack("N2",shift));
}

sub addsig{
    my ($path,$mode,$uid,$gid,$size,$atime,$mtime,$action) = @_;
    my $sig = pack("N2", (S_ISREG($mode) ? $size : 0), $mtime);
    my $atr = $Attribs{$path}; ###js save old attributes for comparison
    tied %Attribs or $Attribs{$path} = pack("N4", $mode, $uid, $gid, $atime);
    if ($opt_v > 3 and $action > 0){
        do_log qq(was: ) . sig2txt($Signature{$path});
        do_log qq(now: ) . sig2txt($sig);
    }
    if ($Signature{$path}){
        my ($oldsize, $oldmtime) = unpack("N2", $Signature{$path});
        S_ISREG($mode) or $size = 0;
        my $dif = abs($oldmtime - $mtime);
        if (($oldsize == $size) && 
                ($dif < 2 ||         ###js SMB shares use 2 second resolution on mod time
                 abs($dif - 3600) < 2)) { ###js after change from/to daylight time there may be 1 hour difference
            if ($opt_m){
                $opt_f or $action = 0;     # same file
            }else{ ###js check for change in mode and ownership
                my ($oldmode, $olduid, $oldgid, $oldatime) = unpack("N4", $atr);
                if ($opt_v > 3 and $action > 0){
                    do_log qq(old mode/uid/gid: ) . sprintf("%o, %d, %d", $oldmode, $olduid, $oldgid);
                    do_log qq(new mode/uid/gid: ) . sprintf("%o, %d, %d", $mode, $uid, $gid);
                }
                if ($mode == $oldmode && $uid == $olduid && $gid == $oldgid){
                    $opt_f or $action = 0;     # same file
                }
            }
        }else{
            $Signature{$path} =  $sig; # different
        }
    }else{
        $Signature{$path} =  $sig; # first time seen
    }
    $Action{$path} = $action;
    $opt_v > 2 and 
        do_log(join("," => $Action{$path},
            sprintf("0x%08x,0x%08x", unpack("N2",$Signature{$path})),
            $path));
    $opt_v > 3 and $action >= 0 and do_log;
}

# File::Find is too general purpose thus slow.
# we implement our own traversal routine

sub scantree {
    my ($root, $path, $action) = @_;
    if ($opt_v < 3){###js - when -v >= 3 this interferes with other output
        if ($ScanCount == 0){ ###js - prettified - removed empty line
            printf "%10d:", $ScanCount;
        }
        else{
            $ScanCount % 8192 == 0 and printf "\n%10d:", $ScanCount;
        }
        $ScanCount % 128  == 0 and print  ".";
    }
    $ScanCount++;

    my $fpath = $root . $path; ###js - use full path (instead of just $path)
    if (grep { $fpath =~ $_} @ExcludePats){
    	if ($action < 0 and $Del_Ignored){
	    	$opt_v > 2 and do_log(sprintf "%s in ExcludePats & Del_Ignored - skip, but delete destination", $path);
	        addsig($path, 0, 0, 0, 0, 0, 0, -1);
	    }
	    else{
	    	$opt_v > 2 and do_log(sprintf "%s in ExcludePats - skip", $path);
	    }
        return;
    }
    
    $action > 0 and $Root{$path} = $root;
    my ($dev, $mode, $nlink, $uid, $gid, $size, $atime, $mtime) = 
        (lstat($fpath))[0,2,3,4,5,7,8,9] or warn "can't stat $fpath";
    addsig($path, $mode, $uid, $gid, $size, $atime, $mtime, $action);

    if (-d _){
        $dev != $Topdev and return;
        opendir my $d, $fpath or warn "$fpath:$!";
        my @f = grep !/^\.(?:\.?$|_)/o, readdir $d; # skip . .. and ._*
        closedir $d;
        for my $f (@f){
            my $spath = "$path/$f";
            if ($IgnoreFiles{$f} and $action < 0 and $f ne $Psync_DB and $Del_Ignored){
		    	$opt_v > 2 and do_log(sprintf "%s in IgnoreFiles & Del_Ignored - skip, but delete destination", $f);
                addsig($spath, $mode, $uid, $gid, $size, $mtime, $atime, -1);
            }else{
                scantree($root, $spath, $action);
            }
        }
    }
}

sub help{
    print <<"EOT";
psync 0.69.3 - usage:
psync   [-a][-d][-f][-m][-n][-p][-q|-v][--exclude=pat]* source_items ... target_directory
psync -r[-a][-d][-f][-m][-n][-p][-q|-v][--exclude=pat]* source_directory target_directory
for more type man psync
EOT
exit;
}
1;
__END__
=head1 NAME

psync -- update copy

=head1 SYNOPSIS

 psync [-a] [-d] [-f] [-m] [-n] [-p] [-r] [-q|-vI<n>] [--exclude=pat]* source_items ... target_directory

=head1 DESCRIPTION

psync does an update copy.  It compares source directory and target
directory at first, then erases items that are nonexistent on source
directory if specified and finally copies everything on source directory.
Items with the same modification date and (data fork) size remain
untouched, saving time on operation.

Currently psync supports options below

=over 4

=item -a

"Full Simulation mode".  It prints (on standard output) details
of what it would do but does nothing.

=item -dI<n>

Delete nonexistent files before starting copy.
-d2 = also delete files matching "Ignore Pattern":
(.*/tmp/.*,.*/dev/.*,.*/private/tmp/.*,.*/private/var/tmp/.*,
 .*/private/var/vm/.*,/ .*/private/var/run/.*\.pid,
 .*/Temporary\ Items/.*,
 .*/Library/Caches/.*,.*/Library/Safari/Icons/.*)
and "Ignored Files":
(.FBCIndex,.FBCLockFolder,.Trashes,AppleShare PDS,Desktop DB, 
 Desktop DF,TheFindByContentFolder,TheVolumeSettingsFolder)

CAVEAT:  Prior to 0.50 this option was default.

=item -f

Force copy.  Copy files even when the file remains unchanged.

=item -m

Don't use mode and ownership when comparing files - use only 
file size and modifications date (to speed it up).

=item -n

"Simulation mode".  It prints (on standard output) a summary
of what it would do but does nothing.

=item -p

Do not fix directory attributes (permissions). 
Useful when source is on a volume that does not support permissions
or has a completely different idea of them (like Windows NT)

=item -q

Quiet mode.  Sets verbose level to 0.

=item -r

Remote backup/restore mode.  Ownership and permissions are
stored/retrieved via C<.psync.db>

If the source directory contains a file C<.psync.db>, psync
turns into remote restore mode.  It uses .psync.db on source
directory to restore ownership and permissions.

If not, psync turns into remote backup mode.  After the backup
it stores ownership and permissions to C<.psync.db>

As the name suggests, this option is imperative when the backup
directory is on remote volume such as AFP, NFS, and Samba.

=item -vI<n>

Sets verbose level.  Default verbose level is 1;  It prints only items
that are changed.  Level 2 prints unchanged files also.  Level 3 and
above are practically debugging mode.

=item --exclude

Add an exclude pattern.  This may be repeated as many times as you wish.
This will be treated as a fixed string, not as a regular expression.

=back

=head1 EXAMPLE

To backup everything in startup volume, all you have to say is

  sudo psync -d / /Volumes/I<backup>

And the resulting I<backup> volume is fully-bootable copy thereof.
Note C<sudo> or root privilege is necessary to restore file
ownership. 

=head1 PERFORMANCE

On PowerBook G3 (pismo) with G3/400, 384MB Memory,  I tested with
C</usr/bin/time -l sudo psync -d / /Volumes/backup>.  The boot volume
contains no more than vanilla OS X 10.1.2 and Developer kit.  It
had a little over 10000 items and 1.8 GB of used space.  Here is
the result;

  HFS+ on Pismo's Expansion Bay
     2539.48 real       121.97 user       290.78 sys
      452.98 real        47.29 user        39.38 sys

  UFS on Pismo's Expansion Bay
     9278.25 real       775.60 user       667.82 sys
     1086.35 real        69.19 user        53.68 sys

  HFS+ Disk Image on AFP Volume
     3127.60 real       217.51 user       445.04 sys
     1059.37 real        69.80 user        52.00 sys

  DVD-RAM formatted as HFS+
    12258.39 real       210.52 user       441.67 sys
      564.49 real        62.47 user        46.65 sys

  NFS
    13227.76 real       429.44 user       583.40 sys
     2348.72 real        83.87 user        88.10 sys

Note screensaver was on with some other background programs.  I used
this program happily with my PowerBook G4 (Ti) while I am surfing the web
and listening to iTunes at the same time letting SETI@Home search for
cosmic programmers :)  With MacOS X, background backup is no problem

=head1 FILES

=over 4

=item .psync.db

Berkeley DB Hash file used to store ownership and permission
information when -r option is set.

=back

=head1 BUGS

Backing up to AFP volume may lose some files with Unicode names other
than the language you specified when you mount the volume.  That is,
When you mount the volume with "Japanese" support,  You may fail to 
backup files with Korean and Chinese names.  AFP prior to MacOSX (
including Netatalk 1.5.x) is also vulnerable to file names that are
longer than 31 bytes.  Old AFP also suffers the problem of 2GB file
size limit.  This may stand in your way when you try to backup on
disk image on AFP volume.

AFP on MacOS X (that is, AFP server is MacOS X) does not have this
problem.

In theory the backup also works on WebDAV and SMB but they remain
untested.

=head1 DISCLAIMER

The author of this utility will be held no responsibility for any
damages and losses of data and/or files that may be caused by the use
thereof.

B<Use me at your own risk!>

=head1 AUTHOR

Dan Kogai <dankogai@dan.co.jp>

=head1 SEE ALSO

L<pcpmac/1>

hfstar F<http://www.geocities.com/paulotex/tar/>

hfspax F<http://homepage.mac.com/howardoakley/>

C<The Finder and File Operations> F<http://developer.apple.com/techpubs/macosx/Essentials/SystemOverview/Finder/The_Finder___Operations.html>

=head1 COPYRIGHT

Copyright 2002 Dan Kogai <dankogai@dan.co.jp>

This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
