#!/usr/bin/perl

# This is a program that recursively generates Captrap's pages and graphs.

# Copyright 2009 Corey Hickey


# This file is part of Captrap.
#
# Captrap is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Captrap is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Captrap.  If not, see <http://www.gnu.org/licenses/>.


=head1 NAME

captrap_recurse - a command-line program for recursively generating Captrap
pages and graphs

=head1 SYNOPSIS

captrap_recurse [OPTION] [OPTION-PARAMETERS]

=head1 DESCRIPTION

This program will recursively generate all of Captrap's pages. There is also a
"cron mode" suited for running periodically as a cron job.

=head1 OPTIONS

=over

=item -help

Print brief usage text.

=item -list-parameters

Print a list of valid parameters.

=item -write FILE PARAMETERS

Write a graph to a file, unless the file already exists. To write to stdout,
specify the file as '-' (if you really want to create a file named '-', specify
'./-'). The filename must be followed by a list of parameters in the usual URL
form (as in param1=value1&param2=value2). Be sure to surround the parameters
with single-quotes if necessary, to avoid having the shell interpret the
ampersands.

=item -v-write DIRECTORY PARAMETERS

Same as "-write", but verbose.

=item -cron-write BASE LINK ROUNDOFF

This is a wrapper for the "-write" action intended to be run as a cron job. The
first argument specifies a base directory; the current time will be appended to
BASE, and files will be written there. Next, a symlink to the new directory
will be created (possibly replacing an old link) as specified by LINK. The
directory to which LINK previously pointed will be removed; some checks are in
place to make sure the old directory contains an old set of Captrap files, but,
to be safe, make sure the directory which contains the link is not writable by
normal users--otherwise a malicious user may make the link point somewhere else
and cause files to be removed. The final parameter to this action, ROUNDOFF,
specifies the unit by which the curent time is rounded down; for example, if
ROUNDOFF is 'hour', pages will be generated as if this script were being run
exactly at the beginning of the current hour.

=back

=head1 EXIT STATUS

=over

=item 0

Everything is ok.

=item 1

No arguments given; usage information was shown.

=item 2

Invalid argument.

=item 3

There was a problem executing an action.

=back

=head1 FILES

=over

=item /etc/captrap/captrap.conf

The main Captrap configuration file.

=back

=head1 EXAMPLES

=over

=item Verbosely write the pages to a directory:

captrap_recurse -v-write page_dir ""

=item Write pages for a specific time to a directory:

captrap_recurse -write page_dir "now=2008-08-22T08:00:00"

=item Sample hourly crontab entry (should be placed in a user's crontab file):
PATH = /usr/local/bin:/usr/bin/:/bin
# m h  dom mon dow   command
5 * * * * captrap_recurse -cron-write /www/captrap/ /www/captrap/static hour

=back

=head1 AUTHOR

Corey Hickey <bugfood-c@fatooh.org>

This program is free software; you may redistribute and/or modify it under the
terms of the GNU General Public License, version 3. See the source file for the
usual GPL preamble and the COPYING file for a copy of the GPL.

=head1 SEE ALSO

captrap_graph, captrap_view, captrap_main, crontab

The documentation included with the Captrap source code has more information on
setup and general usage.

=cut

use strict;
use warnings FATAL => 'all';

use File::Basename;

# for development using a different Captrap module
# use lib "lib";
use Captrap qw(:cgi :misc :actions :args :config :db);
use Captrap::Main qw(:mainpage);
use Captrap::View qw(:view);
use Captrap::Graph qw(:graph);


# -----------------------------------------------------------------------------
# printing
# -----------------------------------------------------------------------------

# print main help info
sub usage {
  my $common = shift; # unused
  my $actions = mk_actions();
  my $actions_text = describe_actions($actions);
  print "
This is a script for recursively generating Captrap's pages and graphs. For
full usage information, see the man page and/or documentation provided in the
Captrap source archive.

captrap_recurse [ACTION] [[ACTION-PARAMETERS]] ...

ACTIONS

$actions_text
"
}


# list all parameters
sub list_params {
  my $common = shift;
  my $param_info = Captrap::Main::mk_param_info($common->{config});
  arg_list_params($param_info);
}

# -----------------------------------------------------------------------------
# recursive page writing
# -----------------------------------------------------------------------------

# make recur hash
sub mk_recur {
  my $config = shift;
  return {
    v => 0,
    dir => undef,
    num => 0,
    tree => {},
    progs => {
      "viewer.pl" => {
        info => Captrap::View::mk_param_info(),
        func => \&Captrap::View::mk_views,
      },
      "grapher.pl" => {
        info => Captrap::Graph::mk_param_info($config),
        func => \&Captrap::Graph::mk_graph,
      },
    },
  };
}


# recursively write files to directory
sub do_recur {
  my $common = shift;
  my $dir = shift;
  my $params = shift;
  unless (-d "$dir") {
    print STDERR "target directory \"$dir\" does not exist\n";
    return 1;
  }
  $common->{recur}->{dir} = $dir;
  my $param_info = Captrap::Main::mk_param_info($common->{config});
  my $file = "$dir/index.html";
  arg_handle_params($common, $param_info, \&arg_mk_mainpage, $params, $file, 0);
  # If we get here, it worked; otherwise, we've already exited.
  # So, return 0 to signify an action that ran ok.
  return 0;
}


# recursively write files to directory (verbose wrapper)
sub do_recur_v {
  my $common = shift;
  my $dir = shift;
  my $params = shift;
  $common->{recur}->{v} = 1;
  return do_recur($common, $dir, $params);
}


# use provided cgi parameters to call the mainpage subroutine
sub arg_mk_mainpage {
  my $common = shift; # hash ref
  my $params = shift; # hash ref
  my $file = shift;
  return write_output_to_file($common, $params, $file, \&mk_mainpage, 0);
}

# -----------------------------------------------------------------------------
# cron mode
# -----------------------------------------------------------------------------

# wrapper for do_recur
sub do_recur_cron {
  my $common = shift;
  my $dir = shift;
  my $link = shift;
  my $unit = shift;
  my $linkdir = dirname($link);
  unless (check_dir($linkdir)) {
    return 3;
  }
  unless (check_dir($dir)) {
    return 3;
  }
  my $olddir = get_olddir($link); # may return undef
  my $times = get_times($common, undef);
  my $now = floor_unit($unit, $times->{now});
  my $newdir = "$dir/$now";
  unless (mkdir($newdir)) {
    print STDERR "can't make new directory '$newdir'\n";
    return 3;
  }
  if (do_recur($common, $newdir, "now=$now")) {
    print STDERR "recursive fetch failed\n";
    return 3;
  }
  if (-e "$link.tmp" && ! unlink("$link.tmp")) {
    print STDERR "can't unlink stale temporary symlink '$link.tmp'\n";
    return 3;
  }
  unless (symlink($newdir, "$link.tmp")) {
    print STDERR "can't create temporary symlink '$link.tmp'\n";
    return 3;
  }
  # should be atomic
  unless (rename("$link.tmp", $link)) {
    print STDERR "can't rename temporary symlink to '$link'\n";
    return 3;
  }
  # do we need to remove old files?
  return 0 unless defined($olddir);
  unless (rm_olddir($olddir)) {
    return 3;
  }
}


# remove an old Captrap download directory
sub rm_olddir {
  my $dir = shift;
  unless (opendir(DIR, $dir)) {
    print STDERR "can't open dir '$dir'\n";
    return 0;
  }
  while (my $file = readdir(DIR)) {
    next if $file =~ /^\.{1,2}$/; # skip . and ..
    $file = "$dir/$file";
    return 0 unless (file_ok($file));
    unless (unlink($file)) {
      print STDERR "can't remove file: '$file'\n";
      return 0;
    }
  }
  # should be empty by now
  unless (rmdir($dir)) {
    print STDERR "can't remove old dir '$dir'\n";
    return 0;
  }
  return 1;
}


# check if file looks like it was generated by Captrap
sub file_ok {
  my $file = shift;
  unless (-f $file && ! -l $file) {
    print STDERR "not a regular file: '$file'\n";
    return 0;
  }
  unless ($file =~ /\.(html|png|txt|csv)$/) {
    print STDERR "unrecognized file: '$file'\n";
    return 0;
  }
  return 1;
}


# check if directory exists
sub check_dir {
  my $dir = shift;
  unless (-e $dir) {
    print STDERR "directory '$dir' does not exist.\n";
    return 0;
  }
  unless (-d $dir && ! -l $dir) {
    print STDERR "'$dir' is not a directory\n";
    return 0;
  }
  return check_dir_perms($dir);
}


# check permissions/ownership of specified directory
sub check_dir_perms {
  my $dir = shift;
  my ($mode, $uid) = (stat($dir))[2, 4];
  if ($uid ne $>) {
    print STDERR "I don't own directory '$dir'\n";
    return 0;
  }
  if ($mode & 00002) {
    print STDERR "Other users can write to directory '$dir'\n";
    return 0;
  }
  return 1;
}


# look at the symlink to the old directory and check if it's ok
sub get_olddir {
  my $link = shift;
  unless (-e $link) {
    return undef;
  }
  unless (-l $link) {
    print STDERR "Warning: file '$link' is not a symlink.\n";
    return undef;
  }
  unless (-d $link) {
    print STDERR "Warning: symlink '$link' does not point to a directory.\n";
    return undef;
  }
  my $target = readlink($link);
  my $olddir;
  if (substr($target, 0, 1) eq '/') {
    # absolute link
    $olddir = $target;
  } else {
    # relative link
    $olddir = dirname($link) . "/$target";
  }
  # now see if the contents of $olddir look like a Captrap download
  local *DIR;
  unless (opendir(DIR, $olddir)) {
    print STDERR "Warning: can't open dir '$olddir'\n";
    return undef;
  }
  while (my $file = readdir(DIR)) {
    next if $file =~ /^\.{1,2}$/; # skip . and ..
    $file = "$olddir/$file";
    next if file_ok($file);
    print STDERR "Unrecognized file in old directory: '$file'\n";
    return undef;
  }
  return $olddir; # looks ok
}

# -----------------------------------------------------------------------------
# actions info
# -----------------------------------------------------------------------------

# return a hash of action info
sub mk_actions {
  my $actions = mk_ixhash();
  %$actions = (
    "-help" => {
      func => \&usage,
      args => [],
      desc => "
          Print this usage text.
      ",
    },
    "-list-parameters" => {
      func => \&list_params,
      args => [],
      desc => "
          Print a list of valid parameters.
      ",
    },
    "-write" => {
      func => \&do_recur,
      args => [ qw(DIRECTORY PARAMETERS) ],
      desc => "
          Write the pages to a directory, as long as the directory already
          exists.
      ",
    },
    "-v-write" => {
      func => \&do_recur_v,
      args => [ qw(DIRECTORY PARAMETERS) ],
      desc => "
          Same as \"-write\", but verbose.
      ",
    },
    "-cron-write" => {
      func => \&do_recur_cron,
      args => [ qw(BASE LINK ROUNDOFF) ],
      desc => "
          This is a wrapper for the \"-write\" action intended to be run as a
          cron job.
      ",
    },
  );
  return $actions;
}

# ----------------------------------------------------------------------------
# parse the arguments and take actions
if (! @ARGV) {
  usage();
  exit(1);
}
my $actions = mk_actions();
check_args(\@ARGV, $actions);

my $config = parse_config();
my $common = {
  cgi => mk_cgi(),
  config => $config,
  dbh    => mk_dbh($config),
  recur  => mk_recur($config),
};

do_args($common, \@ARGV, $actions);

$common->{dbh}->disconnect();
exit(0);
