#!/usr/bin/perl -l
#
# This file causes a list of directories to be removed or moved off
# the users home directory into a given other directory. Usually this
# is used to relief NFS home directories of the burden of caches and
# other performance needing directories.
#
# Copyright (C) 2010 by Axel Beckert <beckert@phys.ethz.ch>,
# Department of Physics, ETH Zurich.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see http://www.gnu.org/licenses/.
#

use strict;
use warnings;

our $VERSION = '0.0.20101217';

# Configuration variables to be used in configuration files
my $CONFIG = {
    TARGETDIR  => '/tmp',
    FILELAYOUT => '.unburden-%u/%s',
};

# Just show what would be done
my $DRYRUN = undef;

# Undo feature
my $REVERT = 0;

# Defaul base name
my $BASENAME = 'unburden_home_dir';

# Load Modules
use Config::File;
use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1;
use File::Path qw(mkpath rmtree);
use File::Basename;
use File::Touch;
use File::Rsync;
use IO::Handle;
use Data::Dumper;

# Declare and initialise some variables
my %OPTIONS = ();
my $FILTER = undef;
my $UID = getpwuid($<);

# Some messages for Getopt::Std
sub VERSION_MESSAGE {
    my ($fh, $getoptpkg, $getoptversion, $cmdlineargs) = @_;

    print $fh "Unburden Home Directory $VERSION\n";
}

sub HELP_MESSAGE {
    my ($fh, $getoptpkg, $getoptversion, $cmdlineargs) = @_;

    print $fh "Usage: $0 [ -n | -u | -b basename | ( -c | -C ) configfile | -f filter ]
       $0 ( -h | --help | --version )

  -b  use the given string as basename instead of $BASENAME.

  -c  read an additional configuration file

  -C  read only the given configuration file

  -f  just unburden those directory matched by the given filter (a perl
      regular expression) -- matches the already unburdened
      directories if used together with -u.

  -l  read an additional list file

  -L  read only the given list file

  -n  dry run (show what would be done)

  -u  undo (reverse the functionality and put stuff back into the home
      directory)

  -h, --help show this help

  --version  show the program's version
";
}

# Parse command line options
getopts('hnuf:b:c:C:l:L:', \%OPTIONS);

if (exists($OPTIONS{h})) {
    my $fh = IO::Handle->new_from_fd(fileno(STDERR),'w');
    VERSION_MESSAGE($fh);
    HELP_MESSAGE($fh);
    exit 0;
}

if (exists($OPTIONS{b})) {
    $BASENAME = $OPTIONS{b};
}

# By default check for a system wide and a user configuration and list file
my @CONFFILES = ("/etc/$BASENAME", "$ENV{HOME}/.$BASENAME");
my @LISTFILES = ("/etc/${BASENAME}_list", "$ENV{HOME}/.${BASENAME}_list");

if (exists($OPTIONS{C})) {
    @CONFFILES = ($OPTIONS{C});
}

if (exists($OPTIONS{c})) {
    push(@CONFFILES, $OPTIONS{c});
}

if (exists($OPTIONS{L})) {
    @LISTFILES = ($OPTIONS{L});
}

if (exists($OPTIONS{l})) {
    push(@LISTFILES, $OPTIONS{l});
}

if (exists($OPTIONS{n})) {
    $DRYRUN = 1;
}

if (exists($OPTIONS{u})) {
    $REVERT = 1;
}

if (exists($OPTIONS{f})) {
    eval { $FILTER = qr/$OPTIONS{f}/; };
    if ($@) {
	&report_serious_problem("parameter to -f", "\n$@");
	exit 2;
    }
}

# Check for configuration files and read them
foreach my $configfile (@CONFFILES) {
    if ( -e $configfile ) {
	$CONFIG = { %$CONFIG,
		    %{Config::File::read_config_file($configfile)} };
    }
}

# Fix some values
$UID =~ s/\s+//gs;

# Remove quotes and line-feeds from values
foreach my $key (keys %$CONFIG) {
    chomp($CONFIG->{$key});
    $CONFIG->{$key} =~ s/^([\'\"])(.*)\1$/$2/;
}

# Set proper umask when creating files or directories. Save current
# umask before.
my $OLDUMASK = umask();
umask(077);

# Initialize rsync object
my $rsync = File::Rsync->new({
    archive => 1,
    verbose => 1,
    outfun => sub { my $_ = shift; chomp; print unless m(^sent |^total size|^\s*$); },
    errfun => sub { chomp; warn "$_[0]\n"; },
});

sub report_problem {
    warn "WARNING: Can't handle $_[0]: $_[1]";
}

sub report_serious_problem {
    warn "ERROR: Can't handle $_[0]: $_[1]";
}

sub move ($$) {
    my ($from, $to) = @_;
    print "Moving $from -> $to";
    unless ($DRYRUN) {
	if (-d $from) {
	    $from .= '/' unless $from =~ m(/$);
	    $to .= '/' unless $to =~ m(/$);

	    my $rc = $rsync->exec({
		src => $from,
		dst => $to,
	    });
	    rmtree($from);
	} else {
	    my $rc = system(qw(mv -vi), $from, $to);
	    return !($? >> 8);
	}
    }
    return 1;
}

sub create_symlink_and_parents {
    my ($old, $new) = @_;
    create_parent_directories($new);
    print "Symlinking $new -> $old";
    symlink($old, $new) || die "Couldn't symlink $new -> $old: $!"
	unless $DRYRUN;
}

sub create_parent_directories {
    my $file = shift;
    my $parent_dir = dirname($file);
    unless (-d $parent_dir) {
	print "Create parent directories for $file";
	mkpath($parent_dir, { verbose => 1 }) unless $DRYRUN;
    }
}

sub possibly_create_non_existing_stuff {
    my ($type, $item, $target) = @_;

    # Shall we create not yet existing directories or files as symlink?
    # Case 1: directory
    if ( $type eq "D" ) {
	# TODO: Refactor create_symlink_and_parents so that its
	# create_parent_directories call isn't redundant in this case.
	print "Create directory $target and parents";
	mkpath($target, { verbose => 1 }) unless $DRYRUN;
	create_symlink_and_parents($target, $item);
    }

    # Case 2: file
    elsif ( $type eq "F" ) {
	create_parent_directories($target);
	print "Touching $target";
	touch($_[2]) unless $DRYRUN;
	create_symlink_and_parents($target, $item)
    }
    return 0
}

sub fix_dangling_links {
    my $link = readlink($_[1]);
    # Check if link target is wanted target
    if ( $link ne $_[2] ) {
	report_problem($_[1], "$link not equal $_[2]");
	return 1
    }

    # Check if target exists and is same type
    if ( -e $_[2] ) {
	# Case 1: directory
	if ( $_[0] eq "d" or $_[0] eq 'D' ) {
	    # Does target exist?
	    if ( ! -d $_[2] ) {
		report_problem($_[1], "Unexpected type (not a directory)");
		return 1
	    }
	}
	# Case 2: file
	elsif ( $_[0] eq "f" or $_[0] eq 'F' ) {
	    # Does target exist?
	    if ( ! -f $_[2] ) {
		report_problem($_[1], "Unexpected type (not a regular file)");
		return 1
	    }
	}
    }
    # Symlink is there, but file or directory not
    else {
	# Case 1: directory
	if ( $_[0] eq "d" or $_[0] eq 'D' ) {
	    print "Create directory $_[2]";
	    mkpath($_[2], { verbose => 1 }) unless $DRYRUN;
	}
	# Case 2: file
	elsif ( $_[0] eq "f" or "$_[0]" eq 'F' ) {
	    create_parent_directories($_[2]);
	    print "Touching $_[2]";
	    touch($_[2]) unless $DRYRUN;
	}
    }
    return 0
}

sub do_it {
    if ( $_[0] eq "d" or $_[0] eq 'D' ) {
	if ( -d $_[1] ) {
	    if ( $_[3] eq "r" or $_[3] eq "d" ) {
		print "Delete directory $_[1]";
		rmtree($_[1], { verbose => 1 }) unless $DRYRUN;
		print "Create directory $_[2]";
		mkpath($_[2], { verbose => 1 }) unless $DRYRUN;
	    }
	    elsif ( $_[3] eq "m" ) {
		&create_parent_directories($_[2]);
		&move(@_[1,2]) || die "Couldn't move $_[1] -> $_[2]: $!"
		    unless $DRYRUN
	    }
	    else {
		warn "Unknown action '$_[3]'. This should never happen.";
		return 255
	    }
	    print "Symlinking $_[2] ->  $_[1]";
	    symlink($_[2], $_[1]) || die "Couldn't symlink $_[2] ->  $_[1]: $!"
		unless $DRYRUN;
	}
	else {
	    report_serious_problem($_[1], "Unexpected type (not a directory)");
	    return 1
	}
    }
    elsif ( $_[0] eq "f" or $_[0] eq 'F' ) {
	if ( -f $_[1] ) {
	    if ( $_[3] eq "r" or $_[3] eq "d" ) {
		print "Deleting $_[1]";
		unlink($_[1]) || die "Couldn't delete $_[1]: $!" unless $DRYRUN;
		&create_parent_directories($_[2]);
		print "Touching $_[2]";
		touch($_[2]) unless $DRYRUN;
	    }
	    elsif ( $_[3] eq "m" ) {
		&create_parent_directories($_[2]);
		print "Moving $_[1] -> $_[2]";
		&move(@_[1,2]) || die "Couldn't move $_[1] -> $_[2]: $!"
		    unless $DRYRUN;
	    }
	    else {
		warn "Unknown action '$_[3]'. This should never happen.";
		return 255
	    }
	    print "$_[2] ->  $_[1]";
	    symlink($_[2], $_[1]) || die "Couldn't symlink $_[2] ->  $_[1]: $!"
		unless $DRYRUN;
	}
	else {
	    report_serious_problem($_[1], "Unexpected type (not a file)");
	    return 1
	}
    }
    else {
	warn "This should never happen.";
	return 255
    }
    return 0
}

sub calculate_target {
    my $replacement = shift;
    my $target = $CONFIG->{FILELAYOUT};

    $target =~ s|%u|$UID|ge;
    $target =~ s|%s|$replacement|g;

    return $CONFIG->{TARGETDIR}."/$target";
}

sub fill_in_wildcard_matches {
    my ($itemglob, $itemexpanded, $target) = @_;

    # Replace %<n> (e.g. %1) with the n-th wildcard match. Uses perl
    # here as it would be too complicated and way less readable if
    # written as (bourne) shell script.

    # Change from globbing to regexp
    $itemglob =~ s/\?/(.)/g;
    $itemglob =~ s/\*/(.*)/g;

    my @result = $itemexpanded =~ m($itemglob)g;

    $target =~ s/\%(\d+)/$result[$1-1]/eg;

    return $target;
}

# Check if the path to something to unburden already contains a symlink
sub symlink_in_path {
    my $path = shift;
    # Remove home directory, i.e. check just from below the home directory
    if ($path =~ s($ENV{HOME}/?)()) {
	# Split up into components, but remove the last one (which we
	# are requested to handle, so we shouldn't check that now)
	my @path_elements = split(m(/), $path);
	pop(@path_elements);

	foreach my $i (0..$#path_elements) {
	    my $path_to_check = $ENV{HOME}.'/'.join("/", @path_elements[0..$i]);
	    #print "Check if $path_to_check is a symlink";
	    return $path_to_check if -l $path_to_check;
	}
	return 0;
    } else {
	report_serious_problem("Can't find home directory ($ENV{HOME}) in $path!");
    }
}

sub replace {
    # replace $type $i $item $replacement
    my ($type, $itemexpanded, $itemglob, $replacement, $action) = @_;

    # Skip entries where wildcard where passed
    if ($itemexpanded =~ /[][*?]/) {
	warn "Skipping '$itemexpanded' due to unmatched wildcard.\n";
	return 0
    }

    if (my $symlink = &symlink_in_path($itemexpanded)) {
	warn "Skipping '$itemexpanded' due to symlink in path: $symlink\n";
	return 0;
    }

    my $target = &fill_in_wildcard_matches($itemglob, $itemexpanded,
					   &calculate_target($replacement));

    # Check if the source exists
    if ( ! -e $itemexpanded and ! -l $itemexpanded ) {
	possibly_create_non_existing_stuff($type, $itemexpanded, $target);
    }
    # Check if source is already a symlink
    elsif ( -l $itemexpanded ) {
	fix_dangling_links($type, $itemexpanded, $target);
    }

    # TODO: Check available disk space
    # Should use report_serious_problem

    # No symlink yet, then actually move or remove!
    else {
	do_it($type, $itemexpanded, $target, $action);
    }
}

sub revert {
    my ($itemexpanded, $item_in_home, $target_glob) = @_;

    # Skip entries where wildcard where passed
    if ($itemexpanded =~ /[][*?]/) {
	warn "Skipping '$target_glob' due to unmatched wildcard.\n";
	return 0
    }

    $item_in_home = "$ENV{HOME}/" .
	&fill_in_wildcard_matches($target_glob, $itemexpanded, $item_in_home);
    print "Trying to revert $itemexpanded to $item_in_home";

    if (-l $item_in_home) {
	my $link_target = readlink($item_in_home);
	if ($itemexpanded eq $link_target) {
	    print "Removing symlink $item_in_home";
	    unlink($item_in_home) unless $DRYRUN;
	    &move($itemexpanded, $item_in_home);
	} else {
	    warn "Ignoring symlink $item_in_home as it points to $link_target ".
		 "and not to $itemexpanded as expected.\n";
	}
    }
}

sub exchange_wildcards_and_replacements {
    my ($wildcard, $replacement) = @_;
    my $i = 1;
    while ($replacement =~ /\%(\d+)/) {
	my $number = $1;
	my $prev = $number-1;
	$wildcard =~ s/^(([^*]*[*?]){$prev}[^*]*)([?*])/"$1\%".$i++/e;
	my $wildcardtype = $3;
	$replacement =~ s/\%(\d+)/$wildcardtype/;
    }
    return ($wildcard, $replacement);
}

for my $list (@LISTFILES) {
    next unless -r $list;

    # Clean up this and that
    open(LIST, '<', $list) or die "Can't open $list: $!";
    while (<LIST>) {
	next if /^#/;

	chomp;
	my ($action, $type, $item, $replacement) = split;

	next unless defined $action;
	#next if $action eq '';

	if (!defined($item) or !defined($replacement)) {
	    warn "Can't parse '$_', skipping...";
	    next
	}
	if ( $type ne "d" and  $type ne "f" and
	     $type ne "D" and  $type ne "F" ) {
	    warn "Can't parse type '$type', must be 'd', 'D', 'f' or 'F', skipping...";
	    next
	}
	if ( $action ne "d" and $action ne "r" and $action ne "m"  ) {
	    warn "Can't parse action '$action', must be 'd', 'r' or 'm', skipping...";
	    next
	}

	if ( $item =~ m(^(\.\.)?/) ) {
	    warn "$item would be outside of the home directory, skipping...\n";
	    next
	}

	if ($REVERT) {
	    ($item, $replacement) = &exchange_wildcards_and_replacements($item, $replacement);

	    my $replacement_path = &calculate_target($replacement);
	    for my $i (glob($replacement_path)) {
		if (defined($FILTER)) {
		    next unless ($i =~ $FILTER);
		}
		revert($i, $item, $replacement);
	    }
	} else {
	    for my $i (glob("$ENV{HOME}/$item")) {
		if (defined($FILTER)) {
		    next unless ($i =~ $FILTER);
		}
		replace($type, $i, $item, $replacement, $action);
	    }
	}
    }
}

# Restore original umask
umask($OLDUMASK);
