#!/usr/bin/perl

# This is a library of functions used by Captrap's graphing programs.

# Copyright 2009 Corey Hickey


# This file is part of Captrap.
#
# Captrap is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Captrap is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Captrap.  If not, see <http://www.gnu.org/licenses/>.

=head1 NAME

Captrap::Graph - a library of functions used by Captrap's graphing programs.

=head1 SYNOPSIS

use Captrap::Graph qw(:graph :param);

=head1 DESCRIPTION

As of yet, this library is not intended to be used outside of Captrap; few of
the included functions can be used in a self-contained manner. This text is
intended as a quick reference for use in Captrap development.

No functions are exported by default.

=head2 Tags

The following groups of functions are available; contents may change in future
versions.

=over

=item :graph

mk_graph

=item :param

mk_param_info, params_ok

=back

=head2 Functions

=over

=cut


use 5.010; # we need Perl >= 5.10 for given/when
use strict;
use warnings FATAL => 'all';
no warnings "experimental::smartmatch";
use feature 'switch';

use Text::CSV;

package Captrap::Graph;
require Exporter;

our @ISA = ('Exporter');
our @EXPORT = ();
our %EXPORT_TAGS = (
  graph => [ qw(
    mk_graph
  ) ],
  param => [ qw(
    mk_param_info
    params_ok
  ) ],
);
# all the tagged functions
our @EXPORT_OK = map({ @$_ } values(%EXPORT_TAGS));


use Date::Calc qw(Add_Delta_YM Add_Delta_YMD Add_Delta_YMDHMS check_date
     check_time Delta_YMDHMS);
use Scalar::Util qw(tainted);
use File::Basename;

# for development using a different Captrap module
use lib "lib";
use Captrap qw(:db :config :cgi :misc);

# -----------------------------------------------------------------------------
# date/time conversion, parsing, etc.
# -----------------------------------------------------------------------------

# convert month to monthly, etc
sub timely {
  my $unit = shift;
  return substr($unit, 0, -1) . "ily" if $unit =~ /^day$/i;
  return $unit . "ly";
}


# trim a time string to specified unit, eg:
# 2009-05-26T13:22:21  -->  2009-05
sub trim_time {
  my $unit = shift;
  my $time = shift;
  #print STDERR "trim_time  unit: $unit time: $time\n";
  my ($year, $month, $day, $hour, $minute, $second) = parse_time($unit, $time);
  return "$year"                                   if $unit =~ /^year$/i;
  return "$year-$month"                            if $unit =~ /^month$/i;
  return "$year-$month-$day"                       if $unit =~ /^day$/i;
  return "$year-$month-$day $hour"                 if $unit =~ /^hour$/i;
  return "$year-$month-$day $hour:$minute"         if $unit =~ /^minute$/i;
  return "$year-$month-$day $hour:$minute:$second" if $unit =~ /^second$/i;
  die "invalid time unit $unit";
}


# add a delta to a time base
# the time string is specified according to $unit
# delta is in increments of $unit
sub base_add_time {
  my $unit = shift;
  my $base = shift;
  my $delta = shift;
  #print STDERR "add_time  unit: $unit base: $base delta: $delta\n";
  # some of these may be undef, depending on $unit
  my ($year, $month, $day, $hour, $minute, $second) = parse_time($unit, $base);
  # many of these time functions have arguments and return values we don't use
  if ($unit =~ /^year$/i) {
    return sprintf("%04d", $base + $delta); # easy
  }
  if ($unit =~ /^month$/i) {
    ($year, $month) = Add_Delta_YM($year, $month, 1, 0, $delta);
    return sprintf("%04d-%02d", $year, $month);
  }
  if ($unit =~ /^day$/i) {
    ($year, $month, $day) = Add_Delta_YMD($year, $month, $day, 0, 0, $delta);
    return sprintf("%04d-%02d-%02d", $year, $month, $day);
  }
  if ($unit =~ /^hour$/i) {
    ($year, $month, $day, $hour) = Add_Delta_YMDHMS(
        $year, $month, $day, $hour, 0, 0, # base
        0, 0, 0, $delta, 0, 0); # deltas
    return sprintf("%04d-%02d-%02d %02d", $year, $month, $day, $hour);
  }
  if ($unit =~ /^minute$/i) {
    ($year, $month, $day, $hour, $minute) = Add_Delta_YMDHMS(
        $year, $month, $day, $hour, $minute, 0, # base
        0, 0, 0, 0, $delta, 0); # deltas
    return sprintf("%04d-%02d-%02d %02d:%02d",
        $year, $month, $day, $hour, $minute);
  }
  if ($unit =~ /^second$/i) {
    ($year, $month, $day, $hour, $minute, $second) = Add_Delta_YMDHMS(
        $year, $month, $day, $hour, $minute, $second, # base
        0, 0, 0, 0, 0, $delta); # deltas
    return sprintf("%04d-%02d-%02d %02d:%02d:%02d",
        $year, $month, $day, $hour, $minute, $second);
  }
}


# set up an array of times from start to end
sub init_times {
  my $array = shift; # array ref
  my $start = shift;
  my $end = shift;
  my $unit = shift;
  # Parameters causing too many steps should be caught earlier, but we don't
  # really want an infinite loop here, so set a local $max_steps much higher
  # than anyone would ever realistically set $config->{max_steps}.
  my $max_steps = 100000;
  $start = trim_time($unit, $start);
  $end   = trim_time($unit, $end);
  for (my $i = 0; $i < $max_steps; ++$i) {
    $array->[$i] = $start;
    #print STDERR "init_times: $i  $start\n";
    return if $start eq $end;
    $start = base_add_time($unit, $start, 1);
  }
 die "too many time steps  start: $start end: $end unit: $unit";
}


# check if time is valid
sub valid_time {
  my $time = shift;
  my ($year, $month, $day, $hour, $minute, $second) =
    parse_time("second", $time);
  return 0 unless check_date($year, $month, $day);
  return 0 unless check_time($hour, $minute, $second);
  return 1;
}


# check if time1 is before time2
sub time_before {
  my $time1 = shift;
  my $time2 = shift;
  my @deltas = Delta_YMDHMS((parse_time("second", $time1),
      parse_time("second", $time2)));
  foreach my $delta (@deltas) {
    return 0 if $delta < 0; # time1 is later for this unit
    return 1 if $delta > 0; # time2 is later for this unit
    # otherwise, we don't know, so go to the next smaller unit
  }
  # wow, time1 and time2 must be the same
  return 0;
}

# -----------------------------------------------------------------------------
# misc.
# -----------------------------------------------------------------------------

# check if all arguments are defined
# return 1 on success, 0 on failure
sub all_defined {
  foreach my $a (@_) {
    return 0 unless defined($a);
  }
  return 1;
}


# choose a nice-looking vertical scale with a little space at the top
sub vertical_scale {
  my $n = shift;
  return 2 if $n <= 1.75;
  return 4 if $n <= 3.50;
  return 6 if $n <= 5.25;
  return 8 if $n <= 7.00;
  # next highest even number + 2
  return 2 * int($n/2) + 4 if $n <= 16;
  # fudge factor based on size of $n
  my $f = 4 * (int($n/80) + 1);
  # next highest multiple of 4 + fudge factor
  return 4 * int($n/4) + $f;
}


# choose the file extension based on the specified output
sub pick_extension {
  my $output = shift;
  # note: don't forget to update the list of extensions in captrap_recurse
  return "html" if ! defined($output); # usage page
  return "png"  if $output eq "png";
  return "svg"  if $output eq "svg";
  return "txt" if $output eq "gnuplot";
  return "txt" if $output eq "text";
  return "csv" if $output eq "csv";
  return "csv" if $output eq "csvraw";
  die "unknown extension for output $output\n";
}


# choose a gnuplot pattern
sub choose_pattern {
  my $common = shift;
  my $enable = shift;
  my $type = shift;
  my $config = $common->{config};
  if (defined($enable)) {
    return undef unless $enable;
  } else {
    return undef unless $config->{patterns_default};
  }
  if ($type eq 'mtpred') {
    return $config->{mtpred_patterns};
  }
  return $config->{state_patterns};
}

# -----------------------------------------------------------------------------
# database fetching
# -----------------------------------------------------------------------------

# make and execute an sth for a "Data / Time" graph
sub mk_sth_per {
  my $common = shift;
  my $item = shift;
  my $unit = shift;
  my $start = shift;
  my $end = shift;
  my $states = shift; # array ref
  my $fmt = unit_to_fmt($unit);
  my @list;
  my ($acct, $macs) = @{$common->{config}}{"db_table_acct","db_table_macs"};
  # mess with the query a bit depending on whether we want specific states or
  # all but ignored states
  my $state_col = "'total'";
  my $not = "NOT ";
  my $by_state = "";
  my $null_ok = "$macs.state <=> NULL OR";
  if (@{$states}) {
    $state_col = "$macs.state";
    $not = "";
    $by_state = ", state";
    $null_ok = "";
  } else {
    # just override $states with ignored states
    $states = $common->{config}->{states_ignore};
  }
  my $ph = placeholder_list($states);
  # This statement sums up per mac, then joins to the mac table, then sums per
  # state. This is quite a bit faster than joining before summing anything as
  # long as there is more than 1 state enabled, and only slightly slower
  # otherwise.
  my $sel = "
    SELECT
      SUM(t_sums.sum) AS sum,
      t_sums.time AS time,
      $state_col AS state
    FROM
      (
        SELECT
          SUM($acct.$item) AS sum,
          DATE_FORMAT($acct.stamp_inserted, ?) AS time,
          $acct.mac_dst AS mac_dst
        FROM $acct
        WHERE
          $acct.stamp_inserted >= ? AND
          $acct.stamp_inserted < ?
        GROUP BY time, mac_dst
      ) AS t_sums LEFT JOIN
      $macs ON t_sums.mac_dst = $macs.mac
    WHERE
      ($null_ok $macs.state $not IN ($ph))
    GROUP BY time$by_state
    ORDER BY time$by_state
  ";
  my $sth = $common->{dbh}->prepare($sel);
  $sth->dbcache_execute($fmt, $start, $end, @{$states});
  return $sth;
}


# fetch a table of data from db
# see the 'my $data' lines below for structure of returned value
sub fetch_db_table {
  my $sth = shift;
  my $max = shift;
  my $opts = shift; # hash ref
  my $sets = shift; # array ref
  ${$max} = 0;
  my $data = {
    "times"  => [],
    "sets"   => @$sets ? $sets : [ "total" ],
    "values" => {}, # indexed by set
  };
  # these are needed for filling in missing times
  if (! all_defined($opts->{x_unit}, $opts->{start}, $opts->{end})) {
    die 'need x_unit, start, and end in $opts';
  }
  # set up an array of times from start to end
  init_times($data->{"times"}, $opts->{start}, $opts->{end}, $opts->{x_unit});
  # Make an array ref for each set. We do this because if a set has
  # no data (none transferred) it won't get any rows returned from the db.
  # An empty array for a set gets treated as containing all zeroes.
  foreach my $set (@{$data->{sets}}) {
    $data->{"values"}->{$set} = [];
  }
  # sth must already be executed
  my $i = 0;
  while (defined(my $a = $sth->fetchrow_arrayref())) {
    my ($datum, $time, $set) = @$a;
    die "NULL time value in results" unless defined($time);
    $datum = 0 unless defined($datum);
    $set = "total" unless defined($set);
    # make the index match the time
    while ($time ne $data->{"times"}->[$i]) {
      ++$i;
      die "can't find $time in times array" if $i > $#{$data->{"times"}};
    }
    $data->{"values"}->{$set}->[$i] = $datum;
    ${$max} = $datum if defined($max) && $datum > ${$max};
    # print STDERR "i: $i  datum: $datum  time: $time  set: $set\n";
  }
  $sth->finish();
  return $data;
}


# make a long select ... union select ... string
sub mk_stamp_string {
  my $start = shift;
  my $end = shift;
  my $unit = shift;
  my $label = shift;
  my $list = [];
  init_times($list, $start, $end, $unit);
  # mysql gives warnings about "Incorrect datetime value"
  my $fix = "";
  $fix = "-01"   if $unit eq "month";
  $fix = "01-01" if $unit eq "year";
  foreach my $item (@$list) {
    $item = "SELECT '$item$fix'";
  }
  $list->[0] .= " AS $label";
  return join(" UNION ", @$list);
}


# -----------------------------------------------------------------------------
# graph preparation/generation
# -----------------------------------------------------------------------------

=item mk_graph($common, $params, $extension)

Generate a graph and return the contents. $extension is a scalar reference; the
referent will be set to the suggested filename extension if the output were to
be written to a file.

=cut

sub mk_graph {
  my $common = shift; # hash ref
  my $params = shift; # hash ref
  my $extension = shift; # hash ref
  $$extension = pick_extension($params->{output});
  # break out early if there are no parameters
  if (! keys(%$params)) {
    $$extension = "html";
    my $cgi = $common->{cgi};
    my $me = "Graph";
    my $text;
    $text .= cgi_start_xhtml($cgi, "$me usage");
    $text .= $cgi->h1("Captrap Grapher Help");
    $text .= cgi_list_params($cgi, $me, mk_param_info());
    $text .= $cgi->end_html();
    return $text;
  }
  # common graph options
  my $font     = basename($common->{config}->{graph_font});
  my $font_path = dirname($common->{config}->{graph_font});
  my $caps = $params->{item} eq 'bytes' ? [ l2a($params->{caps}) ] : [];
  my $opts = {
    data_style        => "boxes",
    item              => $params->{item},
    start             => $params->{start},
    end               => $params->{end},
    w                 => $params->{w},
    h                 => $params->{h},
    output            => $params->{output},
    caps              => $caps,
    font              => $font,
    font_path         => $font_path,
  };
  my $states = [ l2a($params->{states}) ];
  if ($params->{graph} eq "per") {
    return mk_graph_per($common, $opts, $params, $states);
  }
  if ($params->{graph} eq "mt") {
    return mk_graph_mt($common, $opts, $params, $states);
  }
  if ($params->{graph} eq "mtpred") {
    return mk_graph_mtpred($common, $opts, $params, $states);
  }
}


# make a graph directly from data in an sth
sub mk_graph_from_sth {
  my $common = shift;
  my $sth = shift;
  my $opts = shift;
  my $sets = shift;
  my $max;
  my $data = fetch_db_table($sth, \$max, $opts, $sets);
  return mk_graph_choose($common, $data, $max, $opts);
}


# make a "Data / Time" graph
sub mk_graph_per {
  my $common = shift;
  my $opts = shift; # hash ref
  my $params = shift; # hash ref
  my $states = shift; # array ref
  my $item = $params->{item};
  my $unit = $params->{per};
  my $start = print_time($params->{start});
  my $end = print_time($params->{end});
  my $sth = mk_sth_per($common, $item, $unit, $start, $end, $states);
  $opts = {
    %$opts,
    x_label           => $unit,
    x_unit            => $unit,
    key               => @$states ? "outside" : "off",
    title             => "$item / $unit   from $start to $end",
    set_colors        => $common->{config}->{state_colors},
    set_patterns      => choose_pattern($common, $params->{patterns}, 'per'),
  };
  return mk_graph_from_sth($common, $sth, $opts, $states);
}


# make a moving-sum graph
sub mk_graph_mt {
  my $common = shift;
  my $opts = shift; # hash ref
  my $params = shift; # hash ref
  my $states = shift; # array ref
  my $item = $params->{item};
  my $sum_interval = $params->{sum_interval};
  my $sum_unit = $params->{sum_unit};
  my $per_unit = $params->{per};
  my $start = $params->{start};
  my $end = $params->{end};
  my $date_format = unit_to_fmt($per_unit);
  die "invalid sum unit: $sum_unit" unless check_interval_unit($sum_unit);
  my $stamps = mk_stamp_string($start, $end, $per_unit, "stamp");
  my $sel;
  my ($acct, $macs) = @{$common->{config}}{"db_table_acct","db_table_macs"};
  # mess with the query a bit depending on whether we want specific states or
  # all but ignored states
  my $state_col = "'total'";
  my $not = "NOT ";
  my $states_sel = $common->{config}->{states_ignore};
  my $null_ok = "OR $macs.state is NULL";
  if (@{$states}) {
    $state_col = "$macs.state";
    $not = "";
    $states_sel = $states;
    $null_ok = "";
  }
  my $ph = placeholder_list($states_sel);
  # Correlated subqueries and joins... black magic. This should finally be
  # correct. The big long "union" string seems unclean, but I don't think
  # there's any harm in using it.
  $sel = "
    SELECT
      SUM(per_mac.msum) AS msum,
      per_mac.time AS time,
      $state_col AS tstate
    FROM
      (
        SELECT
          (
            SELECT SUM($item) 
            FROM $acct
            WHERE
              stamp_inserted < t_ms.stamp_plus AND
              stamp_inserted >= t_ms.stamp_minus AND
              mac_dst = t_ms.mac
          ) AS msum,
          t_ms.time,
          t_ms.mac AS mac
        FROM
          (
            SELECT
              t_macs.mac AS mac,
              DATE_FORMAT(t_stamps.stamp, ?) AS time,
              t_stamps.stamp + INTERVAL 1 $per_unit AS stamp_plus,
              t_stamps.stamp + INTERVAL 1 $per_unit - INTERVAL ? $sum_unit
                AS stamp_minus
            FROM
              (
                SELECT DISTINCT $acct.mac_dst AS mac
                FROM $acct LEFT JOIN $macs ON $acct.mac_dst = $macs.mac
                WHERE $macs.state $not IN ($ph) $null_ok
              ) AS t_macs JOIN
              (
                $stamps
              ) AS t_stamps
          ) AS t_ms
      ) AS per_mac LEFT JOIN $macs ON per_mac.mac = $macs.mac
    GROUP by time, tstate
  ";
  my @list = ($date_format, $sum_interval, @$states_sel);
  my $sth = $common->{dbh}->prepare($sel);
  $sth->dbcache_execute((@list));
  my $sumly = timely($sum_unit);
  $opts = {
    %$opts,
    x_label           => $per_unit,
    x_unit            => $per_unit,
    start             => $start,
    end               => $end,
    data_style        => "boxes",
    key               => @$states ? "outside" : "off",
    title             => "Moving $sumly Total Data / $per_unit",
    set_colors        => $common->{config}->{state_colors},
    set_patterns      => choose_pattern($common, $params->{patterns}, 'mt'),
  };
  return mk_graph_from_sth($common, $sth, $opts, $states);
}


# make a predicted moving-total graph
sub mk_graph_mtpred {
  my $common = shift;
  my $opts = shift; # hash ref
  my $params = shift; # hash ref
  my $states = shift; # array ref
  my $item = $params->{item};
  my $unit = $params->{per};
  my $start = $params->{start};
  my $end = $params->{end};
  my $pred = $params->{pred};
  # cgi parameters use T to separate date and time
  $start = print_time($start);
  $end   = print_time($end);
  my $state_name;
  # only support the first state listed (would be too crowded)
  if (@$states) {
    $#{$states} = 0;
    $state_name = $states->[0];
  } else {
    # undefined state must be "total"
    $state_name = "total";
  }
  $opts = {
    %$opts,
    x_label           => $unit,
    x_unit            => $unit,
    cap_style         => "lt -1 title \"cap\"",
    key               => "outside",
    title             => "Predicted Total $item for State \\\"$state_name\\\"" .
        " Based on Data\\nfrom $start to $end\\nand Adding Average $item/" .
        "$unit Over Labelled Intervals",
    set_colors        => $common->{config}->{mtpred_colors},
    set_patterns      => choose_pattern($common, $params->{patterns}, 'mtpred'),
  };
  my $sth = mk_sth_per($common, $item, $unit, $start, $end, $states);
  my $data = fetch_db_table($sth, undef, $opts, $states);
  # cut data_in off the data hash
  my $set = (keys(%{$data->{"values"}}))[0]; # should be only one
  my $data_in = $data->{"values"}->{$set};
  $data->{"values"} = {};
  # first find the initial total data and make an output table of future times
  my $past_total = 0;
  my $future = $end; # the beginning of the future is the end of the past
  my @future_out;
  # we can't just index based on @$data_in because some of them are undef
  for (my $i = 0; $i < @{$data->{"times"}}; ++$i) {
    my $datum = $data_in->[$i];
    $past_total += $datum if defined($datum);
    $future = base_add_time($unit, $future, 1);
    push(@future_out, $future);
  }
  # just replace the times in the data hash
  $data->{"times"} = \@future_out;
  # now fill in the predicted data
  my $max = 0;
  $data->{sets} = [];
  # parse string like "123456789:last_month,123456:last_day"
  foreach my $pred_pair (l2a($pred)) {
    my ($pred_add, $pred_title) = split(/:/, $pred_pair);
    my @data_out;
    my $total = $past_total;
    # as above, indexing based on @$data_in is incorrect
    for (my $i = 0; $i < @{$data->{"times"}}; ++$i) {
      my $datum = $data_in->[$i];
      $datum = 0 unless defined($datum);
      $total = $total - $datum + $pred_add;
      $max = $total if $total > $max;
      push(@data_out, $total);
    }
    push(@{$data->{sets}}, $pred_title);
    $data->{"values"}->{$pred_title} = \@data_out;
  }
  return mk_graph_choose($common, $data, $max, $opts);
}


# choose which graph/table/etc. function to call
sub mk_graph_choose {
  my $common = shift;
  my $data = shift; # hash ref
  my $max = shift;
  my $opts = shift;
  # todo: handle this nicer (return a placeholder image?)
  die "No data!" unless @{$data->{"times"}};
  my $next;
  given ($opts->{output}) {
    when ('gnuplot') { $next = \&mk_graph_gnuplot; }
    when ('png')     { $next = \&mk_graph_gnuplot; }
    when ('svg')     { $next = \&mk_graph_gnuplot; }
    when ('text')    { $next = \&mk_graph_text; }
    when ('csv')     { $next = \&mk_graph_csv; }
    when ('csvraw')  { $next = \&mk_graph_csvraw; }
  }
  return &$next($common, $data, $max, $opts);
}


# -----------------------------------------------------------------------------
# gnuplot graph generation
# -----------------------------------------------------------------------------

# arrange data for gnuplot
sub arrange_gnuplot_data {
  my $data = shift;
  my $factor = shift;
  # Unfortunately, I can't seem to make gnuplot like a table from stdin
  # with > 2 columns where columns > 1 are different Y values. So make
  # >= 1 tables where the first columns of each are all time.
  my $num_sets = @{$data->{sets}};
  my $set_num = 0;
  my $text;
  foreach my $set (@{$data->{sets}}) {
    my $values = $data->{"values"}->{$set};
    # run the loop based on how many times there are;
    # data entries could be undef
    for (my $i = 0; $i <= $#{$data->{"times"}}; ++$i) {
      my $value = $values->[$i];
      # undef values should be 0
      $value = 0 unless defined($value);
      my $pos = $set_num / $num_sets - 0.5 + 1/(2*$num_sets) + 1 + $i;
      my $line = $pos . "\t" . $value / $factor;
      $text .= "$line\n";
    }
    $text .= "e\n";
    ++$set_num;
  }
  return $text;
}


# make a gnuplot graph out of supplied data
sub mk_graph_gnuplot {
  my $common = shift;
  my $data = shift; # hash ref
  my $y_max = shift;
  my $opts = shift;

  # adjust max to fit caps, if we want to
  my $s_max = $y_max;
  foreach my $cap (@{$opts->{caps}}) {
    next unless $y_max >= $cap * $common->{config}->{cap_show_percent} / 100;
    $s_max = $cap if $cap > $s_max;
  }
  $y_max = $s_max;
  my ($factor, $base) = choose_factor($y_max, $opts->{item});
  $y_max /= $factor;
  $y_max = vertical_scale($y_max);
  my $ytics = 0 . "," . $y_max/4 . "," . $y_max;

  my $xtics = "nomirror out (";
  for (my $i = 0; $i <= $#{$data->{"times"}} ; ++$i) {
    $xtics .= '"' . $data->{"times"}->[$i] . '" ' . (1 + $i) . ', ';
  }
  substr($xtics, -2) = ')'; # replace last ", " with a ")"

  # Gnuplot makes bars centered on the X coordinate; when a value has an X
  # coordinate on the border of the graph, only half of the bar is shown. We
  # need to make the xrange large enough to fit the whole bar.
  my $x_min = 0;
  my $x_max = $#{$data->{"times"}} + 2;
  # printf STDERR "xmin=$x_min  xmax=$x_max  unit=$opts->{x_unit}  ";
  # build up a plot command
  my @plots;
  # first draw lines for all the caps
  for (my $i = 0; $i < @{$opts->{caps}} && $i < @{$data->{sets}}; ++$i) {
    my $cap = $opts->{caps}->[$i] / $factor; # adjusted
    next if $cap < 0; # no cap
    next if $cap >= $y_max; # too big
    my $set = $data->{sets}->[$i];
    my $style;
    if (defined($opts->{cap_style})) {
      $style = $opts->{cap_style};
    } else {
      # choose color based on position
      my $color = $opts->{set_colors}->{$set};
      $style = defined($color) ? " lt rgb \"#$color\"" : "";
      $style .= " title \"cap: $set\"";
    }
    push(@plots, $cap . $style);
  }
  # next draw boxes for all the data
  my $num_sets = 0;
  foreach my $set (@{$data->{sets}}) {
    my $plot = "'-' using 1:2";
    my $color = $opts->{set_colors}->{$set};
    if (defined($color)) {
      $plot .= " lt rgb \"#$color\""
    }
    my $pattern = $opts->{set_patterns}->{$set};
    if (defined($pattern)) {
      $plot .= " fillstyle pattern $pattern";
    }
    $plot .= " title \"$set\"";
    push(@plots, $plot);
    ++$num_sets;
  }
  my $plot_cmd = "plot " . join(", ", @plots);
  # Enforce a maximum box width. This is imprecise because the calculated box
  # width in pixels ($abs_boxwidth) doesn't take into account margins, etc.,
  # but it's good enough for not making huge ugly boxes.
  my $boxwidth = 1 / $num_sets; # units along X axis
  my $abs_boxwidth = $opts->{w} / (@{$data->{"times"}} * $num_sets); # pixels
  my $max_boxwidth = 150; # pixels
  $boxwidth *= $max_boxwidth / $abs_boxwidth if $abs_boxwidth > $max_boxwidth;
  my $terminal;
  given ($opts->{output}) {
    when (/^(gnuplot|png)$/) {
      $terminal = "png size $opts->{w},$opts->{h} font \"$opts->{font}\" 10";
    }
    when ('svg') {
      $terminal =
          "svg size $opts->{w},$opts->{h} font \"$opts->{font},10\" dynamic";
    }
    default { die "unknown output: $opts->{output}"; }
  }
  # this is all string-interpolated,
  # so make sure to double escape with \\ when necessary
  my $gpc = <<EOF
set terminal $terminal
set output
set title "$opts->{title}"
set xlabel "$opts->{x_label}"
set ylabel "$base"
set xrange ["$x_min":"$x_max"]
set yrange [0:$y_max]
set xtics rotate $xtics
set ytics $ytics
set mytics 2
set style data $opts->{data_style}
set boxwidth $boxwidth
set style fill solid 1 border -1
set key $opts->{key}
set grid ytics mytics noxtics
set datafile separator "\\t"
$plot_cmd
EOF
  ;
  # now add the arranged data
  $gpc .= arrange_gnuplot_data($data, $factor);
  # arbitrary lines passed to gnuplot would be hazardous, so make sure
  # none of it is tainted
  die "gnuplot commands got tainted!" if tainted($gpc);
  if ($opts->{output} eq 'gnuplot') {
    # just return the gnuplot commands
    return $gpc;
  }
  return gnuplot($common->{config}->{gnuplot}, $gpc, $opts->{font_path});
}


# run gnuplot via a pair of pipes
# data is a hash ref from fetch_db_table()
sub gnuplot {
  my $bin = shift;
  my $gpc = shift;
  my $font_path = shift;
  local (*C_OUT, *C_IN, *P_OUT, *P_IN);
  pipe(C_IN, P_OUT); # parent --> child
  pipe(P_IN, C_OUT); # child  --> parent
  my $childpid = fork;
  die "couldn't fork: $!" unless defined($childpid);
  if (! $childpid) {
    # I am the child
    close(P_OUT);
    close(P_IN);
    open(STDOUT, ">&=C_OUT") or die "child: could not reopen STDOUT: $!";
    open(STDIN,  "<&=C_IN")  or die "child: could not reopen STDIN: $!";
    # See Debian bug 524962
    $ENV{GDFONTPATH} = $font_path;
    $ENV{PATH} = ""; # for taint check
    exec($bin)     or die "child: could not exec gnuplot: $!";
  }
  # I am the parent
  close(C_OUT);
  close(C_IN);
  #open(P_OUT, "> /tmp/debug");
  # I think this doesn't need any flushing on the FD;
  # it should be flushed on close()
  print P_OUT $gpc or die "can't print gnuplot commands to child: $!";
  close(P_OUT) or die "close P_OUT failed: $!";
  my $graph;
  my $max_len = 2**20; # 1 MB should be enough
  my $bytes_read = read(P_IN, $graph, $max_len);
  die "gnuplot returned 0 bytes" unless $bytes_read;
  die "too many bytes returned from gnuplot" if $bytes_read >= $max_len;
  close(P_IN) or die "close P_IN failed: $!";
  # don't want zombies
  waitpid($childpid, 0);
  my ($status, $signal) = ($? >> 8, $? & 127);
  if ($status) {
    die "gnuplot failed. exit status $status, signal $signal";
  }
  return $graph;
}


# -----------------------------------------------------------------------------
# output of other types
# -----------------------------------------------------------------------------

# make a text table out of supplied data
sub mk_graph_text {
  my $common = shift;
  my $data = shift; # hash ref
  my $max = shift;
  my $opts = shift;
  my ($factor, $base) = choose_factor($max, $opts->{item});
  # First scan through and translate data into rows for use with sprintf().
  # run the loop based on how many times there are;
  # data entries could be undef
  my @table;
  for (my $i = 0; $i <= $#{$data->{"times"}}; ++$i) {
    my @row;
    push(@row, $data->{"times"}->[$i]);
    foreach my $set (@{$data->{sets}}) {
      my $value = $data->{"values"}->{$set}->[$i];
      $value = defined($value) ? $value / $factor : 0;
      push(@row, $value);
    }
    push(@table, \@row);
  }
  # make a format string
  my $timewidth = length($data->{"times"}->[0]);
  my $numwidth = length(int($max/$factor)) + 4; # 1 for '.' + 3 for precision
  foreach my $set (@{$data->{sets}}) {
    my $l = length($set);
    # make sure set titles aren't too wide
    $numwidth = $l if $l > $numwidth;
  }
  # print STDERR "max: $max  factor: $factor  nw: $numwidth\n";
  my $num_sets = @{$data->{sets}};
  my $fmt   = "%${timewidth}s  |" . "  %$numwidth.3f" x $num_sets;
  my $fmt_t = "%${timewidth}s  |" . "  %${numwidth}s" x $num_sets . "\n";
  # print STDERR "$fmt\n";
  # return a table
  my $sep = '-' x ($timewidth + 2) . '+' . '-' x ($num_sets * ($numwidth + 2));
  my $head = "$opts->{title}\n\nall values are listed as: $base\n\n";
  $head .= sprintf($fmt_t, "time", @{$data->{sets}});
  $head .= "$sep\n";
  return $head . join("\n", map { sprintf($fmt, @$_) } @table);
}

sub mk_graph_csv {
  my $common = shift;
  my $data = shift; # hash ref
  my $max = shift;
  my $opts = shift;
  my $csv = Text::CSV->new();
  my @head = ('time', @{$data->{sets}});
  if (! $csv->combine(@head)) {
    die "csv error inputting: ", $csv->error_input();
  }
  return $csv->string() . "\r\n" .
      mk_graph_csvraw($common, $data, $max, $opts);
}
  

# make csv output from supplied data
sub mk_graph_csvraw {
  my $common = shift;
  my $data = shift; # hash ref
  my $max = shift;
  my $opts = shift;
  my $csv = Text::CSV->new();
  # First scan through and translate data into CSV rows
  # run the loop based on how many times there are;
  # data entries could be undef
  my @rows;
  for (my $i = 0; $i <= $#{$data->{"times"}}; ++$i) {
    my @row;
    push(@row, $data->{"times"}->[$i]);
    foreach my $set (@{$data->{sets}}) {
      my $value = $data->{"values"}->{$set}->[$i];
      $value = defined($value) ? $value : 0;
      push(@row, $value);
    }
    if (! $csv->combine(@row)) {
      die "csv error inputting: ", $csv->error_input();
    }
    push(@rows, $csv->string());
  }
  return join("\r\n", @rows);
}


# -----------------------------------------------------------------------------
# parameters
# -----------------------------------------------------------------------------

=item mk_param_info($config)

Set up a hash of all parameters the grapher accepts, along with their defaults.

=cut

# each parameter MUST have a key here
sub mk_param_info {
  my $config = shift;
  # don't forget to start regexes with a ^ and end with a $
  my $units_re = qr/^(year|month|day|hour)$/;
  my $unit_txt = "Available units are 'year', 'month', 'day', and 'hour'.";
  my $res_re = qr/^[1-9]\d{0,4}$/; # from 1-99999
  my $info = mk_ixhash();
  %$info = (
    graph => {
      def => "per",
      reg => qr/^(per|mt|mtpred)$/,
      txt => "Graph type. Available types are 'per' (data per interval; see\n" .
          "'per' parameter to specify interval), 'mt' (moving total data\n" .
          "per interval), and 'mtpred' (predicted moving total data per\n" .
          "interval).",
      var => 's',
    },
    item  => {
      def => "bytes",
      reg => qr/^(bytes|packets)$/,
      txt => "Item to be graphed ('bytes' or 'packets').",
      var => 's',
    },
    per   => {
      def => "month",
      reg => $units_re,
      txt => "Graph interval unit (the unit by which data is grouped).\n" .
          "$unit_txt",
      var => 's',
    },
    start => {
      def => "0001-01-01T00:00:00",
      reg => time_re("second", "T"),
      txt => "Graph start time in ISO 8601 combined date format (example:\n" .
          "'start=2009-06-23T15:21:44').",
      var => 's',
    },
    end   => {
      def => "9999-01-01T00:00:00",
      reg => time_re("second", "T"),
      txt => "Graph end time (see 'start' parameter).",
      var => 's',
    },
    pred => {
      def => undef,
      # 1-10 comma-separated pairs of (1-20 digit value : title)
      reg => qr/^(\d{1,20}:.{1,20}?,){0,9}\d{1,20}:.{1,20}$/,
      txt => "List of \"number:name\" pairs for the 'mtpred' graph type.\n" .
          "Multiple pairs are separated by a ',' (example:\n" .
          "'bytes=8689718561:last_hour,10038542471:last_day')",
      var => 's',
    },
    states => {
      def => undef, # all states
      # comma-separated list of 1-10 1-20 character values
      reg => qr/^(\w{1,20},){0,9}\w{1,20}$/,
      txt => "Traffic states ('up', 'down', etc.) for the 'per' and 'mt'\n" .
         "graph types. Any alphanumeric name is acceptable, but will not\n" .
         "match any data unless the state is defined in the macs table.\n" .
         "Multiple states are separated by a ',' (example:\n" .
         "'states=up,down').",
      var => 's',
    },
    sum_interval => {
      def => 1,
      reg => qr/^\d{1,10}$/,
      txt => "Summing interval for the 'mt' graph. See also 'sum_unit'.",
      var => 's',
    },
    sum_unit => {
      def => "month",
      reg => $units_re,
      txt => "Summing unit for the 'mt' graph. See also 'sum_interval'.\n" .
          "$unit_txt",
      var => 's',
    },
    w => {
      def => $config->{default_graph_w},
      reg => $res_re,
      txt => "Width of the graph image in pixels. The default is controlled\n" .
          "by the configuration parameter \"default_graph_w\".",
      var => 's',
    },
    h => {
      def => $config->{default_graph_h},
      reg => $res_re,
      txt => "Height of the graph image in pixels. The default is\n" .
          "controlled by the configuration parameter \"default_graph_h\".",
      var => 's',
    },
    output => {
      def => "png",
      reg => qr/^(png|svg|gnuplot|text|csv|csvraw)$/,
      txt => "Output data format. Use 'png' or 'svg' to make PNG or SVG\n" .
          "images, 'gnuplot' to output a set of gnuplot commands, 'text'\n" .
          "to output a plaintext table of values, 'csv' to output csv with\n" .
          "a header line, or 'csvraw' to output csv without a header.",
      var => 's',
    },
    caps => {
      def => undef,
      reg => qr/^(-{0,1}\d{1,20},){0,9}-{0,1}\d{1,20}$/,
      txt => "Comma-separated list of transfer cap sizes in bytes.\n" .
          "Horizontal lines will be drawn with colors corresponding to\n" .
          "the specified list of states. If you need to specify an\n" .
          "unlimited cap, use -1.",
      var => 's',
    },
    patterns => {
      def => undef,
      reg => qr/^[01]$/,
      txt => "Set to 1 to draw graph with patterned bars, or 0 to disable.\n" .
          "See the \"patterns_default\" configuration parameter.",
      var => 's',
    },
  );
  return $info;
}


=item params_ok($common, $params)

Do some additional parameter validation beyond simple syntax checking. Returns
a reference to an array of errors found (the array will be empty if everything
is ok).

=cut

sub params_ok {
  my $common = shift; # hash ref
  my $params = shift; # hash ref
  # Break out early if there are no params; this will just result in parameters
  # getting listed.
  if (! keys(%$params)) {
    return [];
  }
  my $config = $common->{config};
  my @err;
  # check for valid dates
  foreach my $param (qw(start end)) {
    my $value = $params->{$param};
    unless (valid_time($value)) {
      push(@err, "not a valid time: $value");
    }
  }
  # make sure start is before end (if they're both valid)
  unless (@err) {
    my $start = $params->{start};
    my $end   = $params->{end};
    unless (time_before($start, $end)) {
      push(@err, "start time \"$start\" is not before end time \"$end\"");
    }
  }
  # check for too many time steps
  unless (@err) {
    my $num_steps = 1 +
        time_diff($params->{start}, $params->{end}, $params->{per});
    if ($num_steps > $config->{max_steps}) {
      push(@err,
          "Too many time steps: $num_steps $params->{per}s between start\n" .
          "and end times (max $config->{max_steps}). Either set start and\n" .
          "end closer together, use a larger \"per\" unit, or change\n" .
          "\"max_steps\" in the Captrap config file."
      );
    }
  }
  # mtpred without pred
  if ($params->{graph} eq "mtpred" && !defined($params->{pred})) {
    push(@err,
        "Graph type is set to 'mtpred' but the 'pred' parameter is\n" .
        "undefined. There is no suitable default for 'pred', so you must\n" .
        "specify a value."
    );
  }
  return \@err;
}



1;

=back

=head1 AUTHOR

Corey Hickey <bugfood-c@fatooh.org>

This library is free software; you may redistribute and/or modify it under the
terms of the GNU General Public License, version 3. See the source file for the
usual GPL preamble and the COPYING file for a copy of the GPL.

=head1 SEE ALSO

Captrap, Captrap::View, Captrap::Main, captrap_graph, grapher.pl

=cut
