#!/usr/bin/perl

# This is a program that recursively generates Captrap's pages and graphs.

# Copyright 2009 Corey Hickey


# This file is part of Captrap.
#
# Captrap is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Captrap is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Captrap.  If not, see <http://www.gnu.org/licenses/>.


use strict;
use warnings FATAL => 'all';

use File::Basename;

# for development using a different Captrap module
# use lib "lib";
use Captrap qw(:cgi :misc :actions :args :config :db);
use Captrap::Main qw(:mainpage);
use Captrap::View qw(:view);
use Captrap::Graph qw(:graph);


# -----------------------------------------------------------------------------
# printing
# -----------------------------------------------------------------------------

# print main help info
sub usage {
  my $common = shift; # unused
  my $actions = mk_actions();
  my $actions_text = describe_actions($actions);
  print "
This is a script for recursively generating Captrap's pages and graphs.

captrap_recurse [ACTION] [[ACTION-PARAMETERS]] ...

ACTIONS

$actions_text

EXIT STATUS

0              Everything is ok.

1              No arguments given; usage information was shown.

2              Invalid argument.

3              There was a problem executing an action.


EXAMPLES

Verbosely write the pages to a directory:
captrap_recurse -v-write page_dir \"\"

Write pages for a specific time to a directory:
captrap_recurse -write page_dir \"now=2008-08-22T08:00:00\"

Sample hourly crontab entry (should be placed in a user's crontab file):
PATH = /usr/local/bin:/usr/bin/:/bin
# m h  dom mon dow   command
5 * * * * captrap_recurse -cron-write /www/captrap/ /www/captrap/static hour
"
}


# list all parameters
sub list_params {
  my $common = shift;
  my $param_info = Captrap::Main::mk_param_info($common->{config});
  arg_list_params($param_info);
}

# -----------------------------------------------------------------------------
# recursive page writing
# -----------------------------------------------------------------------------

# make recur hash
sub mk_recur {
  my $config = shift;
  return {
    v => 0,
    dir => undef,
    num => 0,
    tree => {},
    progs => {
      "viewer.pl" => {
        info => Captrap::View::mk_param_info(),
        func => \&Captrap::View::mk_views,
      },
      "grapher.pl" => {
        info => Captrap::Graph::mk_param_info($config),
        func => \&Captrap::Graph::mk_graph,
      },
    },
  };
}


# recursively write files to directory
sub do_recur {
  my $common = shift;
  my $dir = shift;
  my $params = shift;
  unless (-d "$dir") {
    print STDERR "target directory \"$dir\" does not exist\n";
    return 1;
  }
  $common->{recur}->{dir} = $dir;
  my $param_info = Captrap::Main::mk_param_info($common->{config});
  my $file = "$dir/index.html";
  arg_handle_params($common, $param_info, \&arg_mk_mainpage, $params, $file, 0);
  # If we get here, it worked; otherwise, we've already exited.
  # So, return 0 to signify an action that ran ok.
  return 0;
}


# recursively write files to directory (verbose wrapper)
sub do_recur_v {
  my $common = shift;
  my $dir = shift;
  my $params = shift;
  $common->{recur}->{v} = 1;
  return do_recur($common, $dir, $params);
}


# use provided cgi parameters to call the mainpage subroutine
sub arg_mk_mainpage {
  my $common = shift; # hash ref
  my $params = shift; # hash ref
  my $file = shift;
  return write_output_to_file($common, $params, $file, \&mk_mainpage, 0);
}

# -----------------------------------------------------------------------------
# cron mode
# -----------------------------------------------------------------------------

# wrapper for do_recur
sub do_recur_cron {
  my $common = shift;
  my $dir = shift;
  my $link = shift;
  my $unit = shift;
  my $linkdir = dirname($link);
  unless (check_dir($linkdir)) {
    return 3;
  }
  unless (check_dir($dir)) {
    return 3;
  }
  my $olddir = get_olddir($link); # may return undef
  my $times = get_times($common, undef);
  my $now = floor_unit($unit, $times->{now});
  my $newdir = "$dir/$now";
  unless (mkdir($newdir)) {
    print STDERR "can't make new directory '$newdir'\n";
    return 3;
  }
  if (do_recur($common, $newdir, "now=$now")) {
    print STDERR "recursive fetch failed\n";
    return 3;
  }
  if (-e "$link.tmp" && ! unlink("$link.tmp")) {
    print STDERR "can't unlink stale temporary symlink '$link.tmp'\n";
    return 3;
  }
  unless (symlink($newdir, "$link.tmp")) {
    print STDERR "can't create temporary symlink '$link.tmp'\n";
    return 3;
  }
  # should be atomic
  unless (rename("$link.tmp", $link)) {
    print STDERR "can't rename temporary symlink to '$link'\n";
    return 3;
  }
  # do we need to remove old files?
  return 0 unless defined($olddir);
  unless (rm_olddir($olddir)) {
    return 3;
  }
}


# remove an old Captrap download directory
sub rm_olddir {
  my $dir = shift;
  unless (opendir(DIR, $dir)) {
    print STDERR "can't open dir '$dir'\n";
    return 0;
  }
  while (my $file = readdir(DIR)) {
    next if $file =~ /^\.{1,2}$/; # skip . and ..
    $file = "$dir/$file";
    return 0 unless (file_ok($file));
    unless (unlink($file)) {
      print STDERR "can't remove file: '$file'\n";
      return 0;
    }
  }
  # should be empty by now
  unless (rmdir($dir)) {
    print STDERR "can't remove old dir '$dir'\n";
    return 0;
  }
  return 1;
}


# check if file looks like it was generated by Captrap
sub file_ok {
  my $file = shift;
  unless (-f $file && ! -l $file) {
    print STDERR "not a regular file: '$file'\n";
    return 0;
  }
  unless ($file =~ /\.(html|png)$/) {
    print STDERR "unrecognized file: '$file'\n";
    return 0;
  }
  return 1;
}


# check if directory exists
sub check_dir {
  my $dir = shift;
  unless (-e $dir) {
    print STDERR "directory '$dir' does not exist.\n";
    return 0;
  }
  unless (-d $dir && ! -l $dir) {
    print STDERR "'$dir' is not a directory\n";
    return 0;
  }
  return check_dir_perms($dir);
}


# check permissions/ownership of specified directory
sub check_dir_perms {
  my $dir = shift;
  my ($mode, $uid) = (stat($dir))[2, 4];
  if ($uid ne $>) {
    print STDERR "I don't own directory '$dir'\n";
    return 0;
  }
  if ($mode & 00002) {
    print STDERR "Other users can write to directory '$dir'\n";
    return 0;
  }
  return 1;
}


# look at the symlink to the old directory and check if it's ok
sub get_olddir {
  my $link = shift;
  unless (-e $link) {
    return undef;
  }
  unless (-l $link) {
    print STDERR "Warning: file '$link' is not a symlink.\n";
    return undef;
  }
  unless (-d $link) {
    print STDERR "Warning: symlink '$link' does not point to a directory.\n";
    return undef;
  }
  my $target = readlink($link);
  my $olddir;
  if (substr($target, 0, 1) eq '/') {
    # absolute link
    $olddir = $target;
  } else {
    # relative link
    $olddir = dirname($link) . "/$target";
  }
  # now see if the contents of $olddir look like a Captrap download
  local *DIR;
  unless (opendir(DIR, $olddir)) {
    print STDERR "Warning: can't open dir '$olddir'\n";
    return undef;
  }
  while (my $file = readdir(DIR)) {
    next if $file =~ /^\.{1,2}$/; # skip . and ..
    $file = "$olddir/$file";
    next if file_ok($file);
    print STDERR "Unrecognized file in old directory: '$file'\n";
    return undef;
  }
  return $olddir; # looks ok
}

# -----------------------------------------------------------------------------
# actions info
# -----------------------------------------------------------------------------

# return a hash of action info
sub mk_actions {
  my $actions = mk_ixhash();
  %$actions = (
    "-help" => {
      func => \&usage,
      args => [],
      desc => "
          Print this usage text.
      ",
    },
    "-list-parameters" => {
      func => \&list_params,
      args => [],
      desc => "
          Print a list of valid parameters.
      ",
    },
    "-write" => {
      func => \&do_recur,
      args => [ qw(DIRECTORY PARAMETERS) ],
      desc => "
          Write the pages to a directory, as long as the directory already
          exists. The directory must be followed by a list of parameters in the
          usual URL form (as in param1=value1&param2=value2). Be sure to
          surround the parameters with quotes if necessary, to avoid having the
          shell interpret the ampersands.
      ",
    },
    "-v-write" => {
      func => \&do_recur_v,
      args => [ qw(DIRECTORY PARAMETERS) ],
      desc => "
          Same as \"-write\", but verbose.
      ",
    },
    "-cron-write" => {
      func => \&do_recur_cron,
      args => [ qw(BASE LINK ROUNDOFF) ],
      desc => "
          This is a wrapper for the \"-write\" action intended to be run as a
          cron job. The first argument specifies a base directory; the current
          time will be appended to BASE, and files will be written there. Next,
          a symlink to the new directory will be created (possibly replacing an
          old link) as specified by LINK. The directory to which LINK
          previously pointed will be removed; some checks are in place to make
          sure the old directory contains an old set of Captrap files, but, to
          be safe, make sure the directory which contains the link is not
          writable by normal users--otherwise a malicious user may make the
          link point somewhere else and cause files to be removed. The final
          parameter to this action, ROUNDOFF, specifies the unit by which the
          curent time is rounded down; for example, if ROUNDOFF is 'hour',
          pages will be generated as if this script were being run exactly at
          the beginning of the current hour.
      ",
    },
  );
  return $actions;
}

# ----------------------------------------------------------------------------
# parse the arguments and take actions
if (! @ARGV) {
  usage();
  exit(1);
}
my $actions = mk_actions();
check_args(\@ARGV, $actions);

my $config = parse_config();
my $common = {
  cgi => mk_cgi(),
  config => $config,
  dbh    => mk_dbh($config),
  recur  => mk_recur($config),
};

do_args($common, \@ARGV, $actions);

$common->{dbh}->disconnect();
exit(0);
