From blair at orcaware.com  Sun May  2 22:00:58 2004
From: blair at orcaware.com (Blair Zajac)
Date: Sun, 2 May 2004 22:00:58 -0700
Subject: [Orca-checkins] r309 - trunk/orca/lib/Orca
Message-ID: <200405030500.i4350wlP018648@orcaware.com>

Author: blair
Date: Sun May  2 21:59:03 2004
New Revision: 309

Modified:
   trunk/orca/lib/Orca/Config.pm
Log:
Remove unused variables.

* lib/Orca/Config.pm
  (load_config):
    Remove the unused %options, %groups and @plots variables.


Modified: trunk/orca/lib/Orca/Config.pm
==============================================================================
--- trunk/orca/lib/Orca/Config.pm	(original)
+++ trunk/orca/lib/Orca/Config.pm	Sun May  2 21:59:03 2004
@@ -1056,11 +1056,6 @@
     return $number_errors;
   }
 
-  # These values hold the information from the config file.
-  my %options;
-  my %groups;
-  my @plots;
-
   # Load in all lines in the file and then process them.  If a line
   # begins with whitespace, then append it to the previously read line
   # and do not process it.


From blair at orcaware.com  Sun May  2 22:05:06 2004
From: blair at orcaware.com (Blair Zajac)
Date: Sun, 2 May 2004 22:05:06 -0700
Subject: [Orca-checkins] r310 - trunk/orca/lib/Orca
Message-ID: <200405030505.i43556sX018921@orcaware.com>

Author: blair
Date: Sun May  2 22:03:27 2004
New Revision: 310

Modified:
   trunk/orca/lib/Orca/SourceFile.pm
Log:
Move duplicated code that makes a deep copy of a plot into a utility function.

* lib/Orca/SourceFile.pm
  (get_date_column):
    Fix indentation.
  (deep_clone_plot):
    New function to make a deep clone of a plot.
  (add_plots):
    Call deep_clone_plot() instead of cloning the plot.


Modified: trunk/orca/lib/Orca/SourceFile.pm
==============================================================================
--- trunk/orca/lib/Orca/SourceFile.pm	(original)
+++ trunk/orca/lib/Orca/SourceFile.pm	Sun May  2 22:03:27 2004
@@ -245,8 +245,9 @@
   }
 
   unless ($found > -1) {
-    warn "$0: warning: cannot find date '$date_column_name' in '$sfile_fids[$fid]'.\n";
-warn "@{$self->[I_COLUMN_DESCRIPTION]}\n";
+    warn "$0: warning: cannot find date '$date_column_name' in ",
+         "'$sfile_fids[$fid]'.\n";
+    warn "@{$self->[I_COLUMN_DESCRIPTION]}\n";
     return;
   }
   $self->[I_DATE_COLUMN_INDEX] = $found;
@@ -254,6 +255,23 @@
   $self;
 }
 
+# XXX
+# Utility function make a deep clone one of the plots in the
+# config_plots array, except for the 'creates' hash key.  This should
+# really be a method for a single plot, but the plot is not an object
+# right now, so it doesn't have any methods that can be given to it.
+sub deep_clone_plot {
+  # Be careful not to make a deep copy of the 'creates' reference,
+  # since it can cause recursion.
+  my $plot             = shift;
+  my $creates          = delete $plot->{creates};
+  my $new_plot         = dclone($plot);
+  $plot->{creates}     = $creates;
+  $new_plot->{creates} = $creates;
+
+  $new_plot;
+}
+
 sub add_plots {
   # Make sure that the user has called the add_groups method and
   # inserted at least one key.
@@ -370,15 +388,10 @@
       }
       $regexp_pos[$i] = @column_description;
 
-      # Start by making a deep copy of the plot.  Be careful not to make
-      # a deep copy of the 'creates' reference, since it can cause
-      # recursion.  Replace the regular expression in the first data
-      # with the name of the column that caused the match.
-      my $creates          = delete $plot->{creates};
-      my $new_plot         = dclone($plot);
-      $plot->{creates}     = $creates;
-      $new_plot->{creates} = $creates;
-      $plot                = $new_plot;
+      # Start by making a deep copy of the plot.  Replace the regular
+      # expression in the first data with the name of the column that
+      # caused the match.
+      $plot = deep_clone_plot($plot);
 
       # At this point we have a copy of plot.  Now go through looking
       # for all the columns that match and create an additional data
@@ -487,20 +500,14 @@
       }
       ++$regexp_pos[$i];
 
-      # Start by making a deep copy of the plot.  Be careful not to make
-      # a deep copy of the 'creates' reference, since it can cause
-      # recursion.  Replace the regular expression in the first data
-      # with the name of the column that caused the match.  Then create
-      # string form of the plot object using Data::Dumper::Dumper and
-      # replace all of the $1, $2, ... with what was matched in the
-      # first data source.
-      my $creates          = delete $plot->{creates};
-      my $new_plot         = dclone($plot);
-      $plot->{creates}     = $creates;
-      $plot                = $new_plot;
+      # Start by making a deep copy of the plot.  Replace the regular
+      # expression in the first data with the name of the column that
+      # caused the match.  Then create string form of the plot object
+      # using Data::Dumper::Dumper and replace all of the $1, $2,
+      # ... with what was matched in the first data source.
+      $plot                = deep_clone_plot($plot);
       $plot->{data}[0][$regexp_element_index] = $column_description;
       my $d                = Data::Dumper->Dump([$plot], [qw(plot)]);
-      $plot->{creates}     = $creates;
       my $count            = 1;
       foreach my $match (@matches) {
         $d =~ s/\$$count/$match/mge;


From blair at orcaware.com  Wed May  5 21:06:26 2004
From: blair at orcaware.com (Blair Zajac)
Date: Wed, 5 May 2004 21:06:26 -0700
Subject: [Orca-checkins] r311 - trunk/orca/orca
Message-ID: <200405060406.i4646QFA005165@orcaware.com>

Author: blair
Date: Wed May  5 21:03:57 2004
New Revision: 311

Modified:
   trunk/orca/orca/orca.pl.in
Log:
* orca/orca.pl.in
  (pod):
    No need to mention other packages that are our competition!


Modified: trunk/orca/orca/orca.pl.in
==============================================================================
--- trunk/orca/orca/orca.pl.in	(original)
+++ trunk/orca/orca/orca.pl.in	Wed May  5 21:03:57 2004
@@ -1180,16 +1180,6 @@
   * Can be run under cron or it can sleep itself waiting for file
     updates based on when the file was last updated.
 
-Orca is similar to but substantially different from other tools that
-record and display hourly, daily, monthly, and yearly data, such as
-MRTG and Cricket.  To see these other tools, examine
-
-  http://people.ee.ethz.ch/~oetiker/webtools/mrtg/mrtg.html
-
-and
-
-  http://cricket.sourceforge.net/
-
 =head1 INDEX
 
 This is an index of this manual:


From blair at orcaware.com  Wed May 12 21:04:08 2004
From: blair at orcaware.com (Blair Zajac)
Date: Wed, 12 May 2004 21:04:08 -0700
Subject: [Orca-checkins] r312 - trunk/orca/orca
Message-ID: <200405130404.i4D448sd022222@orcaware.com>

Author: blair
Date: Wed May 12 21:02:14 2004
New Revision: 312

Modified:
   trunk/orca/orca/orca.pl.in
Log:
* orca/orca.pl.in
  (pod):
    Cleanup Orca's POD.


Modified: trunk/orca/orca/orca.pl.in
==============================================================================
--- trunk/orca/orca/orca.pl.in	(original)
+++ trunk/orca/orca/orca.pl.in	Wed May 12 21:02:14 2004
@@ -1156,12 +1156,9 @@
 Orca is a tool useful for plotting arbitrary data from text files onto
 a directory on Web server.  It has the following features:
 
-  * Configuration file based.
   * Reads white space separated data files.
   * Watches data files for updates and sleeps between reads.
   * Finds new files at specified times.
-  * Remembers the last modification times for files so they do not
-    have to be reread continuously.
   * Can plot the same type of data from different files into different
     or the same PNGs.
   * Different plots can be created based on the filename.
@@ -1177,6 +1174,8 @@
   * Creates an index of URL links listing all available targets.
   * Creates an index of URL links listing all different plot types.
   * No separate CGI set up required.
+  * Remembers the last modification times for files so they do not
+    have to be reread continuously.
   * Can be run under cron or it can sleep itself waiting for file
     updates based on when the file was last updated.
 
@@ -1221,19 +1220,19 @@
 
 Orca has only five command line options.  They are:
 
-B<-gifs>: Generate GIFs instead of PNGs.  Tell Orca to generate GIFs
-instead of PNGs.  You may not want to generate GIFs since PNGs are 1/3
-the size of GIFs and take less time to generate.  The only reason to
-do this is if you are using a browser that does not support PNGs and
-only supports GIFs.
+B<-gifs>: Tell Orca to generate GIFs instead of PNGs.  Most likely,
+you will not want to generate GIFs since PNGs are 1/3 the size of GIFs
+and take less time to generate.  The only reason to do this is if you
+are using a browser that does not support PNGs and only supports GIFs.
 
 B<-no-html>: Do not generate any HTML files and only update the
 images.
 
 B<-o>: Once.  This tells Orca to go through the steps of finding
 files, updating the RRDs, updating the PNGs, and creating the HTML
-files once.  Normally, Orca loops continuously looking for new and
-updated files.
+files once.  Normally, Orca continuously loops reading new data from
+updated data files and sleeping waiting for the data files to be
+updated.
 
 B<-r>: RRD only.  Have Orca only update its RRD files.  Do not
 generate any HTML or PNG files.  This is useful if you are loading in
@@ -1246,9 +1245,9 @@
 more than three B<-v>'s are not used by Orca.
 
 After the command line options are listed, Orca takes one more
-argument which is the name of the configuration file to use.  Sample
-configuration files can be found in the sample_configs directory with
-the distribution of this tool.
+argument which is the name of the configuration file to use.  Actual
+configuration files can be found in the data_gatherers directory
+distributed with Orca.
 
 =head1 MAILING LISTS
 
@@ -1256,7 +1255,7 @@
 lists, please visit the URL listed below.  You have the option of
 choosing a digest form of the mailing list when you subscribe to the
 mailing list or anytime thereafter.  To send email to any of these
-lists you must subscribe to the list.
+lists you must first subscribe to the list.
 
 B<orca-announce at orcaware.com>
 
@@ -1276,10 +1275,13 @@
 
 B<orca-users at orcaware.com>
 
-    The orca-users at orcaware.com is the mailing list for Orca users.
-    Problems relating to downloading, configuring, compiling the
-    necessary Perl modules, installing and configuring Orca belong
-    here.
+    The orca-users at orcaware.com is a general discussion mailing list
+    for Orca users.  This mailing list is appropriate for almost any
+    Orca discussion except for the development of Orca, as those
+    discussions belong on the orca-dev at orcaware.com mailing list.  So
+    issues relating to the installation of, configuring of, and
+    understanding of Orca are welcome.  So are questions relating to
+    the Perl module's that Orca requires.
 
     Home Page
         http://www.orcaware.com/mailman/listinfo/orca-users
@@ -1328,7 +1330,7 @@
 
 =head1 PLOT PREFIXES
 
-RRDtool generates the actual GIF or PNG plots and sometimes will need
+RRDtool generates the actual PNG or GIF plots and sometimes will need
 to scale the Y axis of the plot to have normal looking like numbers,
 such as 1 M instead of 1,000,000.  If you see a letter following the
 numbers in the bottom of the plot, then use that letter to scale the Y
@@ -1384,7 +1386,7 @@
 There are three main groups of parameters in a Orca configuration
 file: global parameters, file specific parameters, and plot specific
 parameters.  Global parameters may be used by the group and plot
-specific parameters.  If an parameter is required, then it is only
+specific parameters.  If a parameter is required, then it is only
 placed one time into the configuration file.
 
 Global parameters break down into two main groups, required and
@@ -1394,6 +1396,13 @@
 
 =over 4
 
+=item B<base_dir> I<directory>
+
+If B<base_dir> is set, then it is used to prepend to any file or
+directory names that do not begin with /.  These are currently
+B<state_file>, B<html_dir>, B<rrd_dir>, and the B<find_files>
+parameter in the B<group> section.
+
 =item B<state_file> I<filename>
 
 For Orca to work efficiently, it saves the last modification time of
@@ -1435,13 +1444,6 @@
 B<rrd_dir>.  Orca will quit with an error if both B<rrd_dir> and
 B<base_dir> are not set.
 
-=item B<base_dir> I<directory>
-
-If B<base_dir> is set, then it is used to prepend to any file or
-directory based names that do not begin with /.  These are currently
-B<state_file>, B<html_dir>, B<rrd_dir>, and the B<find_files>
-parameter in the B<group> section.
-
 =back
 
 =head2 Optional Global Parameters
@@ -1450,36 +1452,13 @@
 
 =item B<require> I<package name> I<version number>
 
-B<require> allows the configuration file to specify the minimum
-required version of a package to run as in the same way that B<use>
-and B<require> are used in Perl programs.  Here, both I<package name>
+The B<require> parameter allows the configuration file to specify the
+minimum required version of a package to run.  Both I<package name>
 and I<version number> are required and I<version number> must be a
 number, not a general Perl expression.
 
-Currently, only the version of Orca can be specified and I<package
-name> must be set to Orca.
-
-=item B<late_interval> I<Perl expression>
-
-B<late_interval> is used to calculate the time interval between a
-file's last modification time and the time when that file is
-considered to be late for an update.  In this case, an email message
-may be sent out using the B<warn_email> addresses.  Because different
-input files may be updated at different rates, B<late_interval> takes
-an arbitrary Perl expression, including mathematical expressions, as
-its argument.  If the word I<interval> occurs in the mathematical
-expression it is replaced with the sampling interval of the input data
-file in question.
-
-This is useful for allowing the data files to update somewhat later
-than they would in an ideal world.  For example, to add a 10% overhead
-to the sampling_interval before an input file is considered late, this
-would be used
-
-  late_interval 1.1 * interval
-
-By default, the input file's sampling interval is used as the
-late_interval.
+Currently, only the minimum required version of Orca can be specified
+and I<package name> must be set to Orca.
 
 =item B<warn_email> I<email_address> [I<email_address> ...]
 
@@ -1499,12 +1478,34 @@
 
   warn_email
 
+=item B<late_interval> I<Perl expression>
+
+B<late_interval> is used to calculate the time interval between a
+file's last modification time and the time when that file is
+considered to be late for an update.  When this happens, an email
+message will be sent out using the B<warn_email> addresses.  Because
+different input files may be updated at different rates,
+B<late_interval> takes an arbitrary Perl expression, including
+mathematical expressions, as its argument.  If the word I<interval>
+occurs in the mathematical expression it is replaced with the sampling
+interval of the input data file in question.
+
+This is useful for allowing the data files to update somewhat later
+than they would in an ideal world.  For example, to add a 10% overhead
+to the sampling_interval before an input file is considered late, this
+would be used
+
+  late_interval 1.1*interval
+
+By default, the input file's sampling interval is used as the
+late_interval.
+
 =item B<expire_images> 1
 
 If B<expire_images> is set then .meta files will be created for all
 generated PNG files.  If the Apache web server 1.3.2 or greater is
-being used, then the following modifications must added to srm.conf or
-httpd.conf.
+being used, then the following modifications must added to Apache's
+httpd.conf file.
 
   <
   < #MetaDir .web
@@ -1521,9 +1522,9 @@
 
 =item B<find_times> I<hours:minutes> [I<hours:minutes> ...]
 
-The B<find_times> parameter is used to tell Orca when to go and find
-new files.  This particularly useful when new input data files are
-created at midnight.  In this case, something like this may work:
+The B<find_times> parameter is used to tell Orca when to find new
+files.  This particularly useful when new input data files are created
+at midnight.  In this case, something like this may work:
 
   # Find files at the following times:
   #    0:10 to pick up new orcallator files for the new day.
@@ -1542,21 +1543,21 @@
 =item B<html_top_title> I<text> ...
 
 This sets the text, that should not be HTML markup, that is used only
-in the main index.html file.  It is used in the <title></title>
+in the main index.html file.  It is placed in the <title></title>
 element and also placed in the HTML body after the html_page_header in
-a <h1></h1> element index.html file.  By default, no additional text
-is placed at the top of the main index.html.
+a <h1></h1> element.  By default, this text is empty.
 
 =item B<html_page_header> I<html> ...
 
-The I<html> is placed at the top of each HTML file that Orca creates.
-By default, no additional text is placed at the top of each HTML file.
+The I<html> is placed at the top of every HTML file that Orca creates.
+It can be HTML markup.  By default, I<html> is empty is empty so no
+additional HTML markup is added to the file.
 
 =item B<html_page_footer> I<html> ...
 
-The I<html> is placed at the bottom of each HTML file that Orca
-creates.  By default, no additional text is placed at the bottom of
-each HTML file.
+The I<html> is placed at the bottom of every HTML file that Orca
+creates.  It can be HTML markup.  By default, I<html> is empty is
+empty so no additional HTML markup is added to the file.
 
 =item B<generate_hourly_plot> I<value>
 

From blair at orcaware.com  Thu May 13 21:40:16 2004
From: blair at orcaware.com (Blair Zajac)
Date: Thu, 13 May 2004 21:40:16 -0700
Subject: [Orca-checkins] r313 - trunk/orca/lib/Orca
Message-ID: <200405140440.i4E4eGcb010031@orcaware.com>

Author: blair
Date: Thu May 13 21:38:31 2004
New Revision: 313

Modified:
   trunk/orca/lib/Orca/SourceFile.pm
Log:
* lib/Orca/SourceFile.pm
  (add_plots):
    Fix some comments to represent what the code actually does.


Modified: trunk/orca/lib/Orca/SourceFile.pm
==============================================================================
--- trunk/orca/lib/Orca/SourceFile.pm	(original)
+++ trunk/orca/lib/Orca/SourceFile.pm	Thu May 13 21:38:31 2004
@@ -352,9 +352,9 @@
     }
 
     # There are three cases to handle:
-    # 1) Regular expression match in the first data with additional datas.
-    # 2) Regular expression match in the first data with no additional datas.
-    # 3) All others.
+    # 1) Regular expression match in the first data with no additional datas.
+    # 2) Regular expression match in the first data with additional datas.
+    # 3) All others cases.
     # The first is a single data source that has a regular expression.  In
     # this case, all of the columns are searched to match the regular
     # expression.  This generates a single plot with all of the different
@@ -374,7 +374,7 @@
       }
     }
 
-    # 1) Regular expression match in the first data with additional datas.
+    # 1) Regular expression match in the first data with no additional datas.
     if ($number_datas == 1 and $regexp_element_index != -1) {
 
       # If we've gone up to the last column to match, then go on.
@@ -463,7 +463,7 @@
       next unless $new_data_index;
     }
 
-    # 2) Regular expression match in the first data with no additional datas.
+    # 2) Regular expression match in the first data with additional datas.
     elsif ($number_datas > 1 and $regexp_element_index != -1) {
       $handle_regexps = 1;
 
@@ -527,7 +527,7 @@
       $i = $plot->{flush_regexps} ? $oldest_regexp_index : $i + 1;
     }
 
-    # 3) All others.
+    # 3) All others cases.
     else {
       $old_i = $i++;
       ++$oldest_regexp_index unless $handle_regexps;


From blair at orcaware.com  Sun May 16 07:53:54 2004
From: blair at orcaware.com (Blair Zajac)
Date: Sun, 16 May 2004 07:53:54 -0700
Subject: [Orca-checkins] r314 - trunk/orca/lib/Orca
Message-ID: <200405161453.i4GErsBh022524@orcaware.com>

Author: blair
Date: Sun May 16 07:51:48 2004
New Revision: 314

Modified:
   trunk/orca/lib/Orca/RRDFile.pm
Log:
* lib/Orca/RRDFile.pm:
  Whitespace fixes and fit lines to 80 characters.


Modified: trunk/orca/lib/Orca/RRDFile.pm
==============================================================================
--- trunk/orca/lib/Orca/RRDFile.pm	(original)
+++ trunk/orca/lib/Orca/RRDFile.pm	Sun May 16 07:51:48 2004
@@ -110,10 +110,12 @@
             $self->[I_RRD_UPDATE_TIME] = $update_time;
             $self->[I_RRD_VERSION]     = $version;
           } else {
-            warn "$0: old version $version RRD '$rrd_filename' found: will create new version $ORCA_RRD_VERSION file.\n";
+            warn "$0: old version $version RRD '$rrd_filename' found: will ",
+                  "create new version $ORCA_RRD_VERSION file.\n";
           }
         } else {
-          warn "$0: unknown version RRD '$rrd_filename' found: will create new version $ORCA_RRD_VERSION file.\n";
+          warn "$0: unknown version RRD '$rrd_filename' found: will create ",
+               "new version $ORCA_RRD_VERSION file.\n";
         }
       }
     }
@@ -150,7 +152,7 @@
 
 # Queue a list of (time, value) data pairs.  Return the number of data
 # pairs sucessfully queued.
-# Call:   $self->(unix_epoch_time1, value1, unix_epoch_time2, value2, ...);
+# Call: $self->(unix_epoch_time1, value1, unix_epoch_time2, value2, ...);
 sub queue_data {
   my $self = shift;
 

From blair at orcaware.com  Sun May 16 08:01:30 2004
From: blair at orcaware.com (Blair Zajac)
Date: Sun, 16 May 2004 08:01:30 -0700
Subject: [Orca-checkins] r315 - trunk/orca/lib/Orca
Message-ID: <200405161501.i4GF1UlM022809@orcaware.com>

Author: blair
Date: Sun May 16 08:00:11 2004
New Revision: 315

Modified:
   trunk/orca/lib/Orca/RRDFile.pm
Log:
* lib/Orca/RRDFile.pm
  (flush_data):
    When RRDs::create fails and a warning is printed to standard
    error, also print all of RRDs::create's arguments to make remote
    support and debugging easier.


Modified: trunk/orca/lib/Orca/RRDFile.pm
==============================================================================
--- trunk/orca/lib/Orca/RRDFile.pm	(original)
+++ trunk/orca/lib/Orca/RRDFile.pm	Sun May 16 08:00:11 2004
@@ -224,7 +224,7 @@
     RRDs::create @options;
 
     if (my $error = RRDs::error) {
-      warn "$0: RRDs::create error: '$rrd_filename' $error\n";
+      warn "$0: RRDs::create(", join(', ', @options), ") failed: $error\n";
       return;
     }
   }


From blair at orcaware.com  Wed May 26 22:57:40 2004
From: blair at orcaware.com (Blair Zajac)
Date: Wed, 26 May 2004 22:57:40 -0700
Subject: [Orca-checkins] r325 - trunk/orca/orca
Message-ID: <200405270557.i4R5veff001432@orcaware.com>

Author: blair
Date: Wed May 26 22:56:02 2004
New Revision: 325

Modified:
   trunk/orca/orca/orca.pl.in
Log:
* orca/orca.pl.in
  (pod):
    Continue cleaning up Orca's POD.
    Describe subgroups and how they are generated from the find_files
      configuration parameter.
    Describe that multiple () matches in find_files are joined
      together using _'s.


Modified: trunk/orca/orca/orca.pl.in
==============================================================================
--- trunk/orca/orca/orca.pl.in	(original)
+++ trunk/orca/orca/orca.pl.in	Wed May 26 22:56:02 2004
@@ -1636,12 +1636,12 @@
   .
   }
 
-The key for a group, in this example GROUP_NAME1 and GROUP_NAME2, is a
-descriptive name that is unique for all files and is used later when
-the plots to create are defined.  Files that share the same global
-format of column data may be grouped together.  The parameters for a
-particular group must be enclosed in the curly brackets {}'s.  An
-unlimited number of groups may be listed.
+The key for a group, in this example groups GROUP_NAME1 and
+GROUP_NAME2, is a descriptive name that is unique for all groups and
+is used later when the plots are defined.  Files that share the same
+format, i.e. the same column names, may be grouped together.  The
+parameters for a particular group must be enclosed in the curly
+brackets {}'s.  An unlimited number of groups may be listed.
 
 =head2 Required Group Parameters
 
@@ -1649,37 +1649,50 @@
 
 =item B<find_files> I<path|regexp> [I<path|regexp> ...]
 
-The B<find_files> parameter tells Orca what data files to use as its
+The B<find_files> parameter tells Orca what data files to use for its
 input.  The arguments to B<find_files> may be a simple filename, a
-complete path to a filename, or a regular expression to find files.
-The regular expression match is not the normal shell globing that the
-Bourne shell, C shell or other shells use.  Rather, Orca uses the Perl
-regular expressions to find files.  For example:
+complete path to a filename, or a regular expression to match multiple
+files.  The regular expression match is not the normal shell globbing
+that the Bourne shell, C shell or other shells use.  Rather, Orca uses
+Perl regular expressions to find files.  For example:
 
-  find_files /data/source1 /data/source2
+  find_files /data/source10 /data/source20
 
-will have Orca use /data/source1 and /data/source2 as the inputs to
+will have Orca use /data/source10 and /data/source20 as the inputs to
 Orca.  This could have also been written as
 
-  find_files /data/source\d
+  find_files /data/source\d+
 
 and both data files will be used.
 
 In the two above examples, Orca will assume that both data files
-represent data from the same source.  If this is not the case, such as
-source1 is data from one place and source2 is data from another place,
-then Orca needs to be told to treat the data from each file as
-distinct data sources.  This be accomplished in two ways.  The first
-is by creating another group { ... } set.  However, this requires
-copying all of the text and makes maintenance of the configuration
-file complex.  The second and recommend approach is to place ()'s
-around parts of the regular expression to tell Orca how to distinguish
-the two data files:
+represent data from the same source are in the same 'subgroup'.  If
+this is not the case, such as source10 is data from one source and
+source20 is data from another source, then Orca needs to be told to
+treat the data from each file as a distinct data source.  This be
+accomplished in two ways.  The first is by creating another group {
+... } set.  However, this requires copying all of the text in the
+configuration file and their maintenance harder.  The second and
+recommend approach is to place ()'s around parts of the regular
+expression to tell Orca how to distinguish the two data files:
 
-  find_files /data/(source\d)
+  find_files /data/(source\d+)
 
-This creates two groups, one named source1 and the other named source2
-which will be plotted separately.  One more example:
+This creates two subgroups, one named source10 and the other named
+source20 which will be plotted separately.  If there are multiple
+()'s, then the subgroup name is the joining of each matched string
+with _'s.  So if
+
+  find_files /data/os_(.*)/(.*)/orcallator.data
+
+matches
+
+  /data/os_linux/host1/orcallator.data
+  /data/os_macosx/host2/orcallator.data
+
+then there are two subgroups, linux_host1 and macosx_host2.
+
+One more example:
 
   find_files /data/solaris.*/(.*)/percol-\d{4}-\d{2}-\d{2}(?:\.(?:Z|gz|bz2))?
 
@@ -1696,10 +1709,9 @@
 
 You'll notice that all but the first () has the form (?:...).  This
 tells Perl to match the expression but not save the matched text in
-the $1, $2, variables.  Orca uses the matched text to generate a
-subgroup name, which is used to place files into different subgroups.
-Here, only the hostname should be used to generate a subgroup name,
-hence all the (?:...) for matching anything else.
+Perl's $1, $2, variables.  Here, only the hostname should be used to
+generate a subgroup name, hence all the (?:...) for grouping anything
+else.
 
 If any of the paths or regular expressions given to B<find_files> do
 not begin with a / and the B<base_dir> parameter was set, then the
@@ -1708,16 +1720,23 @@
 
 =item B<interval> I<seconds>
 
-The B<interval> parameters takes the number of seconds between updates
+The B<interval> parameters is the number of seconds between updates
 for the input data files listed in this group.
 
+This value is very important, because the generated RRD data files are
+created with this value.  If the interval is incorrect, then you may
+find empty plots, even though Orca did read the data.  If the interval
+needs to be changed, then the RRD data files will either need to be
+deleted so that Orca can recreate them or they will need to be modifed
+by an external tool.
+
 =item B<column_description> I<column_name> [I<column_name> ...]
 
 =item B<column_description> first_line
 
 For Orca to plot the data, it needs to be told what each column of
-data holds.  This is accomplished by creating a text description for
-each column.  There are two ways this may be loaded into Orca.  If the
+data means.  This is done by creating a text description for each
+column.  There are two ways this may be configured into Orca.  If the
 input data files for a group do not change, then the column names can
 be listed after B<column_description>:
 
@@ -1729,8 +1748,8 @@
   column_description first_line
 
 This informs Orca that it should read the first line of all the input
-data files for the column description.  Orca can handle different
-files in the same group that have different number of columns and
+data files in this group for the column description.  Orca can handle
+different files in the same group that have different columns and
 column descriptions.  The only limitation here is that column
 descriptions are white space separated and therefore, no spaces are
 allowed in the column descriptions.
@@ -1739,12 +1758,14 @@
 
 =item B<date_source> file_mtime
 
-The B<date_source> parameter tells Orca where time and date of the
-measurement is located.  The first form of the B<date_source>
-parameters lists the column name as given to B<column_description>
-that contains the Unix epoch time.  The second form with the
-file_mtime argument tells Orca that the date and time for any new data
-in the file is the last modification time of the file.
+The B<date_source> parameter tells Orca where to get the time in
+seconds since the Unix epoch when the measurement was taken.
+
+The first form of the B<date_source> parameters lists the column name
+as given to B<column_description> that contains the Unix epoch time.
+The second form with the file_mtime argument tells Orca that the date
+and time for any new data in the file is the last modification time of
+the file.
 
 =item B<date_parse> I<Perl subroutine>
 
@@ -1756,7 +1777,7 @@
 from the 'date_source' column that contains some time information.
 The subroutine should return the Unix epoch time.  If this parameter
 is not specified, then Orca assumes that the string holds the Unix
-epoch time.
+epoch time in integer seconds form.
 
 This Perl subroutine is only used if the file's date source is not
 specified to be the file's last modified time as indicated to Orca by
@@ -1770,20 +1791,24 @@
 
 =item B<filename_compare> I<Perl subroutine>
 
-The B<filename_compare> parameter is used to sort the found filenames
-in a particular group.  This function must be written as though it
-were being passed to the Perl sort() function, which takes the two
-items to compare in the package global $a and $b variables instead of
-the @_ array.
+The B<filename_compare> parameter is used to sort the filenames found
+from the B<find_files> parameter in a particular group.  This function
+must be written as though it were being passed to the Perl sort()
+function, which takes the two items to compare in the package global
+$a and $b variables instead of the @_ array.
 
 Use of this parameter has an additional effect on letting Orca know
-when it can flush data to the RRD files.  It determines this when it
-compares the previously loaded filename to the filename about to be
-loaded using the B<filename_compare> function.  If the result of the
-comparison is greater than 1, then the data is flushed.  If the
-comparison is equal to or less than 1, then the data is not flushed.
-Orca uses a value of 1 instead of 0 since there are cases when the
-filenames should still be ordered but not flushed.
+when it can flush data to the RRD files.  This is very important when
+a large amount of data is being loaded into Orca, so that data is
+flushed continuously to disk instead of increasing Orca's memory usage.
+
+Orca determines when to flush data to disk when it compares the
+previously loaded filename to the filename about to be loaded using
+the B<filename_compare> function.  If the result of the comparison is
+greater than 1, then the data is flushed.  If the comparison is equal
+to or less than 1, then the data is not flushed.  Orca uses a value of
+1 instead of 0 since there are cases when the filenames should still
+be ordered but not flushed.
 
 For example, the orcallator.cfg file uses the following subroutine for
 filenames of the form "orcallator-2000-02-14":
@@ -1809,7 +1834,8 @@
 
 If the B<filename_compare> parameter is not used, then the filenames
 are sorted using the Perl <=> operator and data is not flushed until
-all of it is loaded.
+all input data files are loaded, which could consume a large amount of
+memory.
 
 =item B<late_interval> I<Perl expression>
 
@@ -1821,15 +1847,15 @@
 
 Using the B<reopen> parameter for a group instructs Orca to close and
 reopen any input data files when there is new data to be read.  This
-is of most use when an input data file is erased and rewritten by some
-other process.
+is used when an input data file is erased and rewritten by some other
+process and Orca needs to reread the file from the beginning.
 
 =back
 
 =head2 Plot Parameters
 
-The final step is to tell Orca what plots to create and how to create
-them.  The general format for creating a plot is:
+The final step to configure Orca is to configure the plots.  The
+general format for creating a plot is:
 
   plot {
   title         Plot title


From blair at orcaware.com  Thu May 27 20:51:49 2004
From: blair at orcaware.com (Blair Zajac)
Date: Thu, 27 May 2004 20:51:49 -0700
Subject: [Orca-checkins] r326 - trunk/orca/orca
Message-ID: <200405280351.i4S3pnpm002422@orcaware.com>

Author: blair
Date: Thu May 27 20:49:51 2004
New Revision: 326

Modified:
   trunk/orca/orca/orca.pl.in
Log:
* orca/orca.pl.in
  (pod):
    Improve the plot configuration file parameter documentation.


Modified: trunk/orca/orca/orca.pl.in
==============================================================================
--- trunk/orca/orca/orca.pl.in	(original)
+++ trunk/orca/orca/orca.pl.in	Thu May 27 20:49:51 2004
@@ -1871,20 +1871,23 @@
   .
   }
 
-Unlike the group, there is no key for generating a plot.  An unlimited
-number of plots can be created.
+Unlike a group, there is no key name for a plot.  An unlimited number
+of plots can be created.
+
+The B<title> and B<legend> plot parameters, described below, may
+contain either the string %g or the string %G.  Here, the 'g' refers
+to the 'g' in subgroup.  A subgroup name is generated by joining with
+a _ character all the strings that matched ()'s in the find_files
+parameter for the group name given to the B<source> plot parameter.
+All %g's are replaced with the subgroup name and all %G's are replaced
+with the subgroup name with the first character capitalized.
 
-Some of the plot parameters if they have the two characters %g or %G
-will perform a substitution of this substring with the group name from
-the find_files ()'s matching.  %g gets replaced with the exact match
-from () and %G gets replaced with the first character capitalized.
 For example, if
 
   find_files /(olympia)/data
 
-was used to locate a file, then %g will be replaced with olympia and
-%G replaced with Olympia.  This substitution is performed on the
-B<title> and B<legend> plot parameters.
+was used to find a file, then %g will be replaced with olympia and %G
+replaced with Olympia.
 
 =head2 Required Plot Parameters
 
@@ -1892,7 +1895,7 @@
 
 =item B<source> I<group_name>
 
-The B<source> argument should be a single group name from which data
+The B<source> argument must be one of the group names from which data
 will be plotted.  Currently, only data from a single group may be put
 into a single plot.
 
@@ -1900,19 +1903,18 @@
 
 =item B<data> I<regular expression>
 
-The B<data> plot parameter tells Orca the data sources to use to place
-in a single PNG plot.  At least one B<data> parameter is required for
-a particular plot and as many as needed may be placed into a single
-plot.
+The B<data> plot parameter tells Orca which the data sources to use in
+a single plot.  At least one B<data> parameter is required for a plot.
+There is no limit on how many B<data>s may be placed in a plot.
 
 Two forms of arguments to B<data> are allowed.  The first form allows
-arbitrary Perl expressions, including mathematical expressions, that
-result in a number as a data source to plot.  The expression may
-contain the names of the columns as found in the group given to the
-B<source> parameter.  The column names must be separated with white
-space from any other characters in the expression.  For example, if
-you have number of bytes per second input and output and you want to
-plot the total number of bits per second, you could do this:
+arbitrary Perl expressions that return a number to plot.  The
+expression may contain the names of the columns as found in the group
+given to the B<source> parameter.  The column names must be separated
+with white space from any other characters in the expression.  For
+example, if you have the input and output number of bytes per second
+and you want to plot the total number of bits per second, you could do
+this:
 
   plot {
   source        bytes_per_second
@@ -1976,13 +1978,12 @@
 each plot containing the input and output number of packets per
 second.
 
-By default, when Orca finds a plot set with a regular expression
-match, it will only find one match, and then go on to the next plot
-set.  After it reaches the last plot set, it will go back to the first
-plot set with a regular expression match and look for the next data
-that matches the regular expression.  The net result of this is that
-the generated HTML files using the above configuration will have links
-in this order:
+By default, when Orca finds a plot with a regular expression, it will
+only find one match, and then go on to the next plot.  After it
+reaches the last plot, it will go back to the first plot with a
+regular expression and look for the next data that matches it.  The
+net result of this is that the generated HTML files using the above
+configuration will have the plots listed in this order:
 
   hme0 Input & Output Packets per Second
   hme0 Input & Output Kilobytes per Second
@@ -1993,14 +1994,13 @@
 
 If you wanted to have the links listed in order of hme0 and hme1, then
 you would add the B<flush_regexps> parameter to tell Orca to find all
-regular expression matches for a particular plot set and all plot sets
-before the plot set containing B<flush_regexps> before continuing on
-to the next plot set.  For example, if
+regular expression matches for a particular plot and all plots before
+the plot containing B<flush_regexps> before continuing on to the next
+plot.  For example, if
 
   flush_regexps 1
 
-were added to the plot set for InKB/s and OuKB/s, then the order would
-be
+were added to the plot for InKB/s and OuKB/s, then the order would be
 
   hme0 Input & Output Packets per Second
   hme0 Input & Output Kilobytes per Second
@@ -2011,7 +2011,7 @@
 
 If you wanted to have all of the plots be listed in order of the type
 of data being plotted, then you would add "flush_regexps 1" to all the
-plot sets and the order would be
+plot and the order would be
 
   hme0 Input & Output Packets per Second
   hme1 Input & Output Packets per Second


From blair at orcaware.com  Sat May 29 23:06:27 2004
From: blair at orcaware.com (Blair Zajac)
Date: Sat, 29 May 2004 23:06:27 -0700
Subject: [Orca-checkins] r329 - trunk/orca/data_gatherers/orcallator
Message-ID: <200405300606.i4U66R2r008616@orcaware.com>

Author: blair
Date: Sat May 29 23:04:43 2004
New Revision: 329

Modified:
   trunk/orca/data_gatherers/orcallator/orcallator.se
Log:
* data_gatherers/orcallator/orcallator.se
  (check_output_log_filename):
    Optimize the determination of the output log file to write to.
      Patch by Dmitry Berezin <dberezin at surfside.rutgers.edu>.
    Fix a race condition.  When orcallator.se looks for an output log
      filename to write to, if it finds an uncompressed log file, it
      will spawn a compression program in the background to compress
      it.  If the compression program does not finish compressing and
      removing the uncompressed log file by the time orcallator.se
      finishes the next interval and enters check_output_log_filename
      again, then because check_output_log_filename() didn't check for
      the existence of a partially compressed log file, it will spawn
      another compression program in the background.  This can cause
      problems.  Now, check that there are no compressed log files
      before compressing the uncompressed log file.  Bug found by
      Dmitry Berezin <dberezin at surfside.rutgers.edu>.


Modified: trunk/orca/data_gatherers/orcallator/orcallator.se
==============================================================================
--- trunk/orca/data_gatherers/orcallator/orcallator.se	(original)
+++ trunk/orca/data_gatherers/orcallator/orcallator.se	Sat May 29 23:04:43 2004
@@ -699,7 +699,8 @@
   char   tm_buf[32];
   int    file_number;
   int    need_new_log_file;
-  int    exists;
+  int    exists_uncompressed;
+  int    exists_compressed;
   int    result;
   int    i;
 
@@ -744,33 +745,53 @@
 
     // Get the new filename.  Check for already existing uncompressed
     // log filenames and compressed log filenames.
-    file_number = 0;
     strftime(tm_buf, sizeof(tm_buf), "%Y-%m-%d", now);
+
+    // If the previous log filename has the same date, then increment
+    // the file number, otherwise reset it to 0.  The first time
+    // through, output_filename will be nil.
+    if (nil != output_filename && output_filename =~ tm_buf) {
+      file_number++;
+    } else {
+      file_number = 0;
+    }
+
     while (1 == 1) {
-      exists          = 0;
+
+      // Check for the existence of uncompressed and compressed log
+      // files.  If there is an uncompressed log file but no
+      // compressed ones, then compress it in the background.  If
+      // either an uncompressed or compressed log file exist, then
+      // check the next file number.
       output_filename = sprintf("%s/orcallator-%s-%03d", output_directory,
                                                          tm_buf,
                                                          file_number);
       result          = stat(output_filename, log_file_stat);
       if (result != -1) {
-        exists = 1;
-        if (compress != nil) {
-          system(sprintf(compress, output_filename));
-        }
+        exists_uncompressed = 1;
       } else {
-        for (i=0; i<NUMBER_COMPRESS_SUFFIXES; ++i) {
-          compressed_filename = sprintf("%s%s", output_filename,
-                                                compression_suffixes[i]);
-          result              = stat(compressed_filename, log_file_stat);
-          if (result != -1) {
-            exists = 1;
-            break;
-          }
+        exists_uncompressed = 0;
+      }
+
+      exists_compressed = 0;
+      for (i=0; i<NUMBER_COMPRESS_SUFFIXES; ++i) {
+        compressed_filename = sprintf("%s%s", output_filename,
+                                              compression_suffixes[i]);
+        result              = stat(compressed_filename, log_file_stat);
+        if (result != -1) {
+          exists_compressed = 1;
+          break;
         }
       }
 
-      if (exists != 0) {
-        file_number++;
+      if (nil != compress &&
+          0 != exists_uncompressed &&
+          0 == exists_compressed) {
+        system(sprintf(compress, output_filename));
+      }
+
+      if (0 != exists_uncompressed || 0 != exists_compressed) {
+        ++file_number;
       } else {
         break;
       }


From blair at orcaware.com  Mon May 31 16:38:36 2004
From: blair at orcaware.com (Blair Zajac)
Date: Mon, 31 May 2004 16:38:36 -0700
Subject: [Orca-checkins] r330 - trunk/orca/data_gatherers/procallator
Message-ID: <200405312338.i4VNcaa5003365@orcaware.com>

Author: blair
Date: Mon May 31 16:37:00 2004
New Revision: 330

Modified:
   trunk/orca/data_gatherers/procallator/procallator.cfg.in
Log:
* data_gatherers/procallator/procallator.cfg.in:
  For the Disk Reads/Writes Per Second and the Disk Transfer Rate
  plots, for the second data line, instead of using (.*), use the $1
  from the match on the first line.  Bug noted by Michael Podhorodecki
  <Michael.Podhorodecki at sensis.com.au>.


Modified: trunk/orca/data_gatherers/procallator/procallator.cfg.in
==============================================================================
--- trunk/orca/data_gatherers/procallator/procallator.cfg.in	(original)
+++ trunk/orca/data_gatherers/procallator/procallator.cfg.in	Mon May 31 16:37:00 2004
@@ -507,7 +507,7 @@
 title			%g Disk Reads/Writes Per Second
 source			procallator
 data			disk_rd_(.*)/s
-data			disk_wr_(.*)/s
+data			disk_wr_$1/s
 line_type		area
 line_type		line1
 legend			$1 Reads/s
@@ -522,7 +522,7 @@
 title			%g Disk Transfer Rate
 source			procallator
 data			disk_rB_(.*)/s
-data			disk_wB_(.*)/s
+data			disk_wB_$1/s
 line_type		area
 line_type		line1
 legend			$1 Read transfer rate


From blair at orcaware.com  Mon May 31 18:47:16 2004
From: blair at orcaware.com (Blair Zajac)
Date: Mon, 31 May 2004 18:47:16 -0700
Subject: [Orca-checkins] r331 - trunk/orca/lib/Orca
Message-ID: <200406010147.i511lGhU006814@orcaware.com>

Author: blair
Date: Mon May 31 18:45:23 2004
New Revision: 331

Modified:
   trunk/orca/lib/Orca/SourceFile.pm
Log:
Fix a bug introduced in revision 310.  Bug found by and solution
suggested by Michael Podhorodecki <Michael.Podhorodecki at sensis.com.au>.

* lib/Orca/SourceFile.pm
  (deep_clone_plot):
    Take an additional argument if the creates hash key should be
      restored in the new plot.
    In an array context, return both the new plot and the creates hash
      reference.  In scalar context, return only the new plot.
  (add_plots):
    Change all calls to deep_clone_plot() to use the new argument.
    Fix the bug introduced in revision 310.  In the second case with
      more than one data in the plot, do not immediately restore the
      creates hash key when calling deep_clone_plot().  Instead,
      restore it after Data::Dump->Dump is called to avoid the
      recursion problem.


Modified: trunk/orca/lib/Orca/SourceFile.pm
==============================================================================
--- trunk/orca/lib/Orca/SourceFile.pm	(original)
+++ trunk/orca/lib/Orca/SourceFile.pm	Mon May 31 18:45:23 2004
@@ -261,15 +261,23 @@
 # really be a method for a single plot, but the plot is not an object
 # right now, so it doesn't have any methods that can be given to it.
 sub deep_clone_plot {
+  my $plot             = shift;
+  my $restore_creates  = shift;
+
   # Be careful not to make a deep copy of the 'creates' reference,
   # since it can cause recursion.
-  my $plot             = shift;
   my $creates          = delete $plot->{creates};
   my $new_plot         = dclone($plot);
   $plot->{creates}     = $creates;
-  $new_plot->{creates} = $creates;
+  if ($restore_creates) {
+    $new_plot->{creates} = $creates;
+  }
 
-  $new_plot;
+  if (wantarray) {
+    ($new_plot, $creates);
+  } else {
+    $new_plot;
+  }
 }
 
 sub add_plots {
@@ -391,7 +399,7 @@
       # Start by making a deep copy of the plot.  Replace the regular
       # expression in the first data with the name of the column that
       # caused the match.
-      $plot = deep_clone_plot($plot);
+      $plot = deep_clone_plot($plot, 1);
 
       # At this point we have a copy of plot.  Now go through looking
       # for all the columns that match and create an additional data
@@ -505,9 +513,11 @@
       # caused the match.  Then create string form of the plot object
       # using Data::Dumper::Dumper and replace all of the $1, $2,
       # ... with what was matched in the first data source.
-      $plot                = deep_clone_plot($plot);
+      my $creates;
+      ($plot, $creates)    = deep_clone_plot($plot, 0);
       $plot->{data}[0][$regexp_element_index] = $column_description;
       my $d                = Data::Dumper->Dump([$plot], [qw(plot)]);
+      $plot->{creates}     = $creates;
       my $count            = 1;
       foreach my $match (@matches) {
         $d =~ s/\$$count/$match/mge;