#!/usr/bin/perl

#
# run rech --help or rech -h for a brief help message.
#

##  Copyright (c) 1997-2001 Ivan Kurmanov. All rights reserved.
##
##  This program is free software; you can redistribute it and/or modify it
##  under the same terms as Perl itself.


BEGIN {

$VERSION = "0.1";   ###  $Id$

    $ProgrammName="rech";
    $ProgrammVersion="1.$VERSION";
    $ProgrammDescription="ReDIF files checker";
    $ProgrammAuthor="Ivan Kurmanov";
    $ProgrammAuthorEmail="kurmanov\@openlib.org";

    print "- $ProgrammName v$ProgrammVersion -- $ProgrammDescription -\n".
        "\tby $ProgrammAuthor ($ProgrammAuthorEmail)\n\n";
}



##############################################################################
# 'use' and 'require' statements here.
#

# Standard Library

use File::Find;
use Getopt::Long;

# other libraries


# Own-written

############################################################################

############################################################################


my $help_message = <<END_OF_HELP;
Use: 
    rech [ OPTIONS ] [ DIRECTORY ]
    rech [ OPTIONS ] FILE1 [ FILE2 ... ]

Options:
  -e   report only errors
  -w   report errors and warnings
  -d   debug mode
  -a   - don't check the local archive's data directory
  -nq  - do not quote (citate) checked files on errors (or warnings)
  -h or --help
       show this help message and quit

  --rdir <ReDIF_home_dir_name>
  --redif.home <ReDIF_home_dir_name>  
       sets ReDIF home directory, overrides any other way to set it

  --spec <spec_file>
  --redif.spec <spec_file>
       specifies the ReDIF specification file, either by name, 
       directory or a full pathname

If no files or directory names are given to rech on the command line, it
will check the local data directory (unless that is prohibited by the
configuration file or options).

Note: checking a directory also means checking all its subdirectories. 

END_OF_HELP

{ 
    my $i = 0;  ### iteration index
    my $help;   ### result flag 
    while ( $ARGV[$i] ) {
        my $para = $ARGV[$i];
        if ( ( $para eq '-h' ) 
             or ( $para eq '--help' ) 
             or ( $para eq '-help' ) 
             )  {
            $help = 1;
        }
        $i++;
    }
    if ( $help ) {
        print $help_message;
        exit;
    }
}


use ReDIF::init;

BEGIN { 
  ReDIF::initialize( {'print_results' => 0 } );
}

# specification
    
use ReDIF::Spec ();

my $spec_file = $ReDIF::CONFIG{spec_full_name} ;

my $spec_object = ReDIF::Spec -> new( $spec_file );

my $redif_home      = $ReDIF::CONFIG{redif_home};
my $redif_home_type = $ReDIF::CONFIG{redif_home_type};
my $local_data_dir  = $ReDIF::CONFIG{data_path} || '';

my $address  = $ReDIF::CONFIG{address};
my $archive  = $ReDIF::CONFIG{archive_id};
#  my $authority = $ReDIF::CONFIG{authority_id};


print
    "configuration: \n",
    "\tredif.spec file   : ",  $spec_file,             "\n",
    "\tredif.spec version: ",  $spec_object->version,  "\n",
    "\tReDIF home dir    : ", 
       defined $redif_home ? ( $redif_home, " (type: $redif_home_type)" ) : 'not identified',
       "\n",
    "\tlocal data dir    : ",  $local_data_dir,        "\n",
    "\n"
    ;

if( defined $redif_home ) {
#    print "Start as ReDIF archive <$address> at dir $redif_home" , 
#          " (type: $redif_home_type)\n";
} else {
    print "No ReDIF home setting found/identified\n";
}


use strict;

my $message_threshold = 2;
my $check_local_archive_dir = 1;
my $source_quote = 1;
my $unicode_output = 0;

###    Continuing the command-line options analysis

my %opt = ();

&GetOptions ( \%opt, 'h', 'help', 'w', 'e', 'd', 'm=i', 'u', 's', 'a', 'ne', 'nq', 'in=i' );

if ( (defined $opt{'h'}) or (defined $opt{'help'}) ) {
    die "Help options should have been treated above";
}

if ( defined $opt{w} ) {
    $message_threshold = 2; 
    print "option '-w': will report warnings and errors\n";
}

if ( defined $opt{e} ) {
    $message_threshold = 3; 
    print "option '-e': will report only errors\n";
}

if ( defined $opt{d} ) {
    $message_threshold = 0; 
    print "option '-d': will report warnings, errors and debugging notices\n";
}

if (defined $opt{m}) {
    my $ml = $opt{m};

    if( defined $opt{w} or defined $opt{e} or defined $opt{d} ) {
        print "please don't use '-m' option with any of '-e' '-w' '-d' options\n";
    }

    if ( ($ml !~ /^\d+$/ ) or ( ($ml<0) or ($ml>5) ) ) {
        die "for '-m' option you should specify a valid integer number from the range 0..5";
    } else { 
        print "option '-m': Minimal message rank to display: $ml\n"; 
        $message_threshold = $ml;
    }
}

if ( defined $opt{s} ) {
    print "option '-s' is obsolete -- doesn't do anything useful now\n";
}

if ( defined $opt{a} ) {
    $check_local_archive_dir = 0;
    print "option '-a': rech will not check local data directory\n"; 
}

if ( $opt{u} ) {
    $unicode_output = 1;
    print "option '-u': rech output will be in UTF-8 encoding\n"; 
    binmode( STDOUT, ":utf8" );
}


if ( defined $opt{nq} ) {
    $source_quote = 0;
    print "option '-nq': will not quote original files on errors and warnings\n";
}

if ( defined $opt{ne} ) { # No Exec - Do not run $ExecOnError command...
    print "option '-ne': ignored\n";
}

if (defined $opt{in}) {
    print "option '-in': ignored\n";
}


my @check_list = @ARGV;

#########################################################################
#########################################################################

my %Options = (
#           'build_template_hash' => 1,
            'build_template_hash' => 0,
            'quote_source' => $source_quote,
            'message_threshold' => $message_threshold,
            'use_parser_input' => 1,
            'redif_specification' => $spec_object,
            'utf8_output' => $unicode_output,
);


use ReDIF::Parser qw( &redif_open_file 
                      &redif_get_next_template 
                      &redif_get_next_template_good_or_bad ) ;

ReDIF::Parser::redif_set_parser_options( %Options );


#########################################################################

print "\n";

if ( scalar @check_list ) {

    print "Checking: ", ( join ', ', @check_list ) , "\n";
    
    foreach my $file ( @check_list ) {
        if( -e $file ) {
            if( -f _ ) {
                checkfile ( $file );
            } elsif ( -d _ ) {
                print "going into directory: $file\n";
                find( \&wanted, $file ) ;
            } else {
                print "can't check: $file\n";
            }
        } else {
            print "what's this '$file'?\n";
        }
    }

} else {

    if ( $check_local_archive_dir ) {
        if( defined $local_data_dir ) {
            print "going into directory: $local_data_dir\n";
            find( \&wanted, $local_data_dir ) ;
        } else {
            print "Local data directory is unknown: specify ReDIF home";
        }
    } else {
        print "Nothing to check." ;

    }
}

print "\n";


sub wanted {
    if (/\.rdf$/i) {
        my $file = $_;
        checkfile( $File::Find::name, $_ );
    }
}


sub checkfile {

    my $filename      = shift ;
    my $local_name    = shift || $filename;
    
    my $file_type = '';
    
    my $ok;

    print "file $filename: ";

    redif_open_file( $local_name );  

    my $good_templates = 0;
    my $bad_templates;


    while ( 1 ) {
        my $t = redif_get_next_template_good_or_bad();
        if ( not $t ) { last; } 
        else { if ( not defined $ok ) { $ok = 1 ; } }

        if ( $t->{ENCODING} eq 'invalid' ) {
          if ( $t->{ERRORS} ) {
            print "\nskipping a Unicode-data template, which has some errors... ";
          }

        } elsif ( $t->{MESSAGES} ) {
            print "\n", $t->{REPORT};
            $ok = 0;
        }

        if ( $t->{RESULT} eq 'good' ) { $good_templates++; } 
        else { $bad_templates++; }

    }

    if ( $ok ) {
        print "OK ($good_templates)";
    } elsif ( not defined $ok ) {
        print "empty or foreign file";
    }
    print "\n";
}


############################################################################

1;

__END__

=head1 NAME

rech - ReDIF data checking tool

=head1 SYNOPSIS

rech [ -w | -e | -d | -m number ] [ -a ] [ -nq ] 
[ --spec specification-file | --redif.spec specification-file ]
[ --rdir ReDIF-home-dir | --redif.home ReDIF-home-dir ]
[ file-or-directory1 ... ]

=head1 DESCRIPTION

This utility is part of ReDIF-perl suite.  It's purpose is
syntax-checking ReDIF data.

Rech checks the files named on the command line or searches the named
directories for files with ".rdf" suffix (case-insensitive) checks
what finds.  It prints a detailed report of the problems found with
the data or says "OK" for each file without such.

If not a single file or directory is specified on the command line to
be checked - then the local RePEc archive (or other ReDIF-home's data
directory) will be checked, unless '-a' option provided.

=head1 RECH OPTIONS

=over 4

=item -e 

report errors, don't show warnings

=item -w

report warnings and errors (default)

=item -d 

special debugging mode: in addition to warnings and errors will report
some information about each attribute validation steps.

=item -m NUMBER

compatibility option.  If NUMBER is 2, equivalent to "-w" option,
NUMBER is 3 - equivalent to "-e" option, NUMBER is 0 (zero) equivalent
to "-d" option.

=item -nq 

turn off source data quoting, when reporting data problems

=item -a 

do not check local RePEc archive (or stand-alone ReDIF-home's data/
dir).  Only makes sense when no file/directory is given on the command
line.  That actually makes rech run in "do nothing" mode.  Then it
only checks the configuration, reports it to you and quits.

=item -u

Output quoted source data and reported messages in UTF-8 encoding.
(By default rech outputs data in it's original encoding.)

=back 

=head1 GENERAL RePEc/ReDIF SOFTWARE OPTIONS

=over 4

=item --spec FILE-OR-DIR, --redif.spec FILE-OR-DIR

specifies ReDIF specification filename to use.  If the option
parameter FILE-OR-DIR is a valid directory name, then "redif.spec"
file will be searched for in that directory and will be used if
present.  If the parameter is not a directory, but contains a slash or
backslash character, it is assumed to be a full path filename of the
'redif.spec' file to use.  If the parameter is not a directory and has
no slash/backslash in it, then it is treated as a name of the file to
be used instead of 'redif.spec' in the default specification
directory, which depends on specified or otherwise given ReDIF home
directory.

So giving "--spec ./spec" would make rech to look for redif.spec in
that directory, and abort if not found.

Giving "--spec /home/ivan/special-redif.spec" would mean that that file 
would be checked and used if found, or rech will abort.

Giving "--spec new-redif.spec" means that in the default
"redif.spec"'s directory file "new-redif.spec" will be searched and
used if found.  Abort is guaranteed otherwise.

Option --redif.spec is equivalent.


=item --rhome REDIFDIR, --redif.home REDIFDIR

Sets the directory of your RePEc archive or ReDIF home.  Overrides
REDIFDIR (and REDIFHOME) environment variables and other settings.

=back

=head1 IGNORED COMPATIBILITY OPTIONS

These options are actually ignored and only supported for minimal
compatibility with previous versions of rech:  -ne -in -s

The functionality these options were related to has been removed from
rech as useless.

=head1 TO DO

=over 4

=item * 

an option to turn off recursive directory treatment

=back


=head1 AUTHOR

Ivan Kurmanov for the RePEc project

=cut




