package ReDIF::Spec;
#
#  The package to read and use ReDIF specification file.
#

##  Copyright (c) 1997-2001 Ivan Kurmanov. All rights reserved.
##
##  This code is free software; you can redistribute it and/or modify it
##  under the same terms as Perl itself.

#      $Id: Spec.pm,v 2.1 2006/07/03 22:16:21 ivan Exp $
$VERSION = do { my @r=(q$Revision: 2.1 $=~/\d+/g); sprintf "%d."."%02d"x$#r,@r }; 


=pod 

=head1 NAME 

ReDIF::Spec -- redif.spec parser and ReDIF Specification class

=head1 SYNOPSIS

    use ReDIF::Spec;

    ###   create object -- parse redif.spec
    my $spec = ReDIF::Spec -> new();
    $spec = ReDIF::Spec -> new( '/home/ivan/spec/redif.spec' );

    $spec -> ok() || die "can't load redif.spec" ;

    ###   use specification -- get info about a context
    $the_context = $spec -> context( 'some_context_id' );

    my $id_string = $spec -> template_type_id( 'template type' );
    my $context = $spec -> template_type_context( 'template type' );

    my $type = $spec -> type( 'type' );


=head1 DESCRIPTION

This module is part of the ReDIF-perl suite, developed to allow Perl
applications easy access to ReDIF data.  ReDIF data is validated
against "ReDIF specification".  The specification is a special-format
text file, which describes data structures and value formats for ReDIF
templates.  This file historically has the name of redif.spec, but in
fact it can be a file with any name in your case.

This module provides object-oriented interface to the ReDIF
specification read from the redif.spec.  The only thing you have to do
before you start is specify the redif.spec file name.

The structures used to store redif.spec contents and passed around by
this class' accessor methods are very specific, and are not documented
anywhere.  But briefly: 

- A context is a template-type or a cluster type in which some
  attributes are valid to appear.  Here in ReDIF::Spec a context is
  represented by a hash of attributes and attribute specifications;

- An attribute specification is the name of the attribute with some
  (optional) parameters, governing the value that the attribute may
  validly take and the "occurence constraints" of the attribute and
  the attribute type (e.g. cluster or key).  Here in ReDIF::Spec each
  attribute specification is represented by a hash of several
  parameters and their respective values.

- A type is a set of constraints on the textual value that some
  attributes may take and the processing for them (processing &
  checking are closely related in some cases here).  Constraints can
  be of several types.  A type is represented by a hash of type's
  constraint type ('usage') and its respective value.

=head1 AUTHOR

Ivan Kurmanov, for the RePEc project

=cut

#   ReDIF::Spec and ReDIF::Parser::Core looks at templates and
#   clusters as contexts (a bit similar to what yacc does but much
#   simplier).  In each context there are some attributes, which are
#   expected.  Each context has a name.  An attribute may be a
#   "switch", which means that when the attribute is encountered a new
#   context should be opened (inside current).  That is the case for
#   clusters' key attributes.  Attribute also can have some usual type
#   (url, email or smth like this) and subtype (for switches)




use strict;
use vars qw( $LATEST_REDIF_SPEC_FILE );


use constant DEBUG => 0;

sub new {
    my $class = shift;
    my $file = shift;

    if ( not defined $file ) {
        die "need to know the redif.spec file location";
    }

    my $object = {
        filename => $file,
        ok => 0,
        recordtypes => {},
    };

    bless $object, $class;

    $object-> init_contexts( );

    my $res = $object-> load_specification_file( $file );
    if( $res ) {
        $object ->{ok} = 1;
    }
    

    $object -> compile_template_starters ();
    
    $object -> compile_check_evals ();
    $object -> compile_context_postproc ();

    $object -> build_attribute_list();

#    warn "compiled redif.spec $file";

    return $object;
}


sub ok {
    my $self = shift;
    return $self->{ok};
}

sub version {
    return shift ->{version};
}

sub context {
    my $self = shift;
    my $name = shift;
    return $self->{contexts}{$name};
}


sub template_type_id {
    my $self = shift;
    my $name = shift;
    return $self->{templates}->{$name};
}

sub template_type_context {
    my $self = shift;
    my $name = shift;
    my $id = $self->template_type_id( $name );
    return $self->context( $id );
}


sub type {
    my $self = shift;
    my $name = shift;
    return $self->{types}->{$name};
}


my %uses;

%uses = (
         'check-regex' => 1,
         'check-eval'  => 1,
         'length'      => 1,
#         'preproc' => 1
         );


sub init_contexts {
    
    my $self = shift || die;
  
    my $template = {
        'SHOWNAME' => 'template (general)',
        'NAME' => 'template',
#       'template-type' => {}
    };

    my $zero = {
        'SHOWNAME' => 'empty',
        'NAME' => 'zero',
#       'template-type' => { 'switch' => 'template' },
    };
    
    $self->{templates} = {};
    $self->{types}     = {};
    $self->{contexts} = {
        'zero' => $zero,
        'template' => $template,
    };

    $self->{starters} = {};
}



########################################################################
#
# This function should read the specification file ("redif.spec") and
# supply lines to the next function
#

sub getline {

  START:
    if ( not ($_ = <SPEC>) ) {
        return( $_ );      
    }
    chomp ;
    if ( /^\s*\#/ 
         or /^\s*$/ ) { goto START; }
    return( $_ );
}


#########################################################################
# And this very important function processes data received from
# getline sub and creates data structures in memory for the whole
# this of complete ReDIF data check
#

###############################################################################
###   load specification file method
###############################################################################
sub load_specification_file {

    my $self = shift;
    
    my $spec_file = shift;

    $LATEST_REDIF_SPEC_FILE = $spec_file;

    if ( not open ( SPEC, "<$spec_file" ) ) {
        warn "Error: cannot open ReDIF specification file '$spec_file'";
        return 0;
    }

    my $context='root';
    my $templs = 1;

    my $last;
    my $type;
    my $name;
    my $con;
    my $use;

    my $_contexts  = $self->{contexts};
    my $_templates = $self->{templates};
    my $_types     = $self->{types};
    my $_starters  = $self->{starters};

    while ( getline() ) {

        my $original = $_;
        s/^\s+|\s+$//g;

        if ($context eq 'root') {  

            ###   the redif.spec file level, not yet in any 
            ###   structure or substructure


            ###   process the line found

            tr/A-Z/a-z/;
            ($type, $name) = split (/\s*=\s*/, $_ );

            if( not defined $type or not defined $name ) {
                warn "Error in redif.spec file, line $.";
                return( 0 );
            }       
            
            if ( $name =~ /\{$/ ) {
                $last = '{';
                $name =~ s/\s*\{$//;
            } else { $last = 0; }

            if ($type eq 'version') {   ###  redif.spec version string
                if( not defined $self->{version} ) {
                    $self->{version} = $name;
                } else {
                    ###  version statement repeated
                }

            } elsif ($type eq 'include') {   ###  include another redif.spec file
                if( not defined $self->{include} ) {
                    $self->{include} = ();
                }

                ###  push it onto a list of files to process
                push @{$self->{include}}, $name;

            } elsif ($type eq 'cluster') {   ###   cluster type description block
                $con = {};
                $_contexts -> { $name } = $con ;
                $con-> {'NAME'} = $name;
                $con-> {'SHOWNAME'} = $name;
                
                $con-> {'DESCRIPTION'} = "cluster of type '$name'";
                $context = '{';

            } elsif ( $type eq 'template' ) {   ###   template type description block
                $con = {};
                my $id = 'template'.$templs ;

                $_contexts -> { $id } = $con ;
                $con-> {'NAME'} = $id;
                $con-> {'TEMPLATE'} = $name;
                $con-> {'SHOWNAME'} = "template $name";

                $con-> {'DESCRIPTION'} = "template of type '$name'";
                $con-> {SWITCHERS} = {};

                $_templates -> { $name } = $id;
                $templs++;
                $context = '{';

            } elsif ( $type eq 'type' ) {   ###   data type description block
                $use = '';
                if ($name =~ /\//) {
                    $name =~ s/\s*\/\s*([^\s]+)//
                        or (
                            warn ("'type' statement is incorrect at redif.spec file, line $." )
                            and return( 0 ) );
                    $use = $1;
                }
                if ($use eq '') { $use = 'check-regex' }
                elsif ( ! $uses{$use} ) {
                    warn ( "type statement's \'use\' part is incorrect:".
                         " '$use' at redif.spec file, line $." );
                    return( 0 );
                }
                $context = '{';

            } elsif ( $type eq 'postproc' ) {   ###   template type description block

                $con = $_contexts -> { $name };

                if( not defined $con ) {
                    $con = $_contexts -> { $_templates ->{ $name } };
                }

                if( not defined $con ) {
                    $con = {};
                    $_contexts -> { $name } = $con;

                    warn ( "postprocessing being defined for an unknown context '$name' at redif.spec, line $." );
#                   return 0;
                }
                
                die if ref( $con ) ne 'HASH';

                $con->{POSTPROC} = "";

                $context = '{';

            } elsif ( $type eq 'record-type' ) {   ###   open record type description block
                my $rt = {};
                $self->{recordtypes} -> {$name} = $rt;
                $con = $rt;

                $con-> {'NAME'}     = $name;
                $con-> {'SHOWNAME'} = $name;
                $con-> {'DESCRIPTION'} = "record of type '$name'";

                $con-> {'TYPE'}     = "record";

                $context = "{";

            } else {   ###   unrecognized construct at the root file level
                warn( "Error in redif.spec file, line $. (unrecognized construct)" );
                return( 0 );
            }

            if ( $last eq '{' ) {
                $context = $type;
            }

        } elsif ( $context eq '{' ) {
            if ($_ =~ /{/ )  {
                $context = $type;
            }

        } elsif ( $context =~ /^(?:cluster|template)$/ ) {
            
            ###  now inside cluster or template type definition block

            if ( $original =~  /^\}\s*$/ ) {
                # close block
                $context = 'root';
                next;
            }

            ###  parse an attribute definition line into particles
            tr/A-Z/a-z/;

            my ($name, $type, $subtype, $min, $max, $length, $deprec);

            if( index( $_, ',' ) > -1 ) {
                my @parts = split /\s*,\s*/, $_;
                my $name_type = shift @parts;

                ($name, $type, $subtype) = split /\s*:\s*/, $name_type;

                my $qualifier;
                foreach $qualifier ( @parts ) {
                    if ( $qualifier eq 'req'
                         or $qualifier eq 'required'  
                       ) {
                        if ( not defined $min ) { $min = 1; }
                        else {
                          warn 
                          "conflicting or superflous qualifier '$qualifier' at line $. of redif.spec file $spec_file";
                        }
                    } elsif ( $qualifier eq 'norep'  
                              or $qualifier eq 'nonrep' 
                              or $qualifier eq 'nonrepeatable' ) {
                        if ( not defined $max ) { $max = 1; }
                        else {
                          warn 
                        "conflicting or superflous qualifier '$qualifier' at line $. of redif.spec file $spec_file";
                        }
                    } elsif ( 
                              ( $qualifier eq 'key' ) ) {
                        $min = '*';

                    } elsif ( $qualifier =~ /^deprec(ated)?$/i ) {
                        $deprec = 1;

                    } else {
                        die "unknown (wrong) attribute qualifier '$qualifier' at line $. file $spec_file";
                    }

                }
                
            } else {
                ($name, $type, $subtype, $min, $max, $length) = split (/\s*:\s*/, $_ );
            }

            if ( not defined $name ) {
                warn "Bad attribute definition line at redif.spec line $. (ignored)";
                next;
            }

            my $spec;   ###  reference to the attribute specification structure

            if ( defined $type  and  $type eq 'cluster' ) {
                my $n = $name;
                my $key = $_contexts ->{$subtype} {KEY};
                $name .= "-$key";
                
                $spec = $con -> {$name} = {};

                $spec -> {switch} = $subtype;
                $spec -> {prefix} = $n . '-';

                $con -> {SWITCHERS} {$n} = $subtype;

            } else {
              $spec = $con -> {$name} = {};

              if ( not $type ) {
                $type = "default_type";
              }
              $spec ->{type} = $type;
              $spec ->{subtype} = $subtype;
            }


            ###   occurence statistics 

            $spec ->{COUNT}{CUR} = 0;
            $spec ->{COUNT}{ALL} = 0;
            $spec ->{COUNT}{MAX} = 0;

            ###   length
            if ( defined $length ) {
#             $spec -> {maxlength} = $length;
            }

            if ( (defined $min) and ($min eq '*') )  {
                $spec-> {key} = 1;
                $con -> {KEY} = $name;

                if( $context eq 'template' ) {
                    if( not defined  $_starters -> {$name} ) {
                        $_starters -> {$name} = [];
                    } 
                    push @{ $_starters ->{$name} }, $con->{NAME};
                }
  
            } elsif (defined $min and ($min) ) {
                if ($min !~ /^\d$/ ) {
                    warn "Min occurence constraint of attribute '$name' "
                        . "is incorrect at redif.spec, line $. (ignored)";
                    $min = 0;
                }
                $spec-> {min} = $min;

            } else {
                $spec-> {min} = undef;
            }

            if ( defined $max and $max ne '' ) {
                $spec-> {max} = $max;
            }
            if ( $deprec ) { $spec->{deprec} = 1; }

        } elsif ( $context =~ /^record\-type$/ ) {
            
            ###  now inside record type definition block

            if ( $original =~  /^\}\s*$/ ) {
                # close block
                $context = 'root';
                next;
            }

            my $special_line;

            if( /^!/ ) { $special_line = 1; }

            ###  parse an attribute definition line into particles
            if( not $special_line ) {
                tr/A-Z/a-z/;
            }

            ###  real problem with open record's context objects:
            ###  usual context object's hash has two kinds of members:
            ###  lower case - attributes, upper case - type metadata.
            ###  Here I thought of providing a possibility of
            ###  case-sensitive attributes, but that is probably too
            ###  much to ask from the parser now.  so go remove "_"...

            
            if( $special_line ) {
                s/^!//;
                my ($directive, $attribute, $value ) = split /\s*:\s*/, $_; 
                
                if( ($directive eq 'START' ) 
                    or ($directive eq 'END' ) ) {
                    $con -> {$directive} = { attr => $attribute, value => $value };
                } else {
                    $con -> {$directive} = $attribute ;
                }
                goto RECORD_TYPE_END;
            }

            my ($name, $type, $subtype, $min, $max);

            if( index( $_, ',' ) > -1 ) {
                my @parts = split /\s*,\s*/, $_;
                my $name_type = shift @parts;

                ($name, $type, $subtype) = split /\s*:\s*/, $name_type;

                my $qualifier;
                foreach $qualifier ( @parts ) {
                    if(
                       ( $qualifier eq 'req' ) 
                       or ( $qualifier eq 'required' ) 
                       ) {
                        if( not defined $min ) { $min = 1; }
                        else {
                            warn "conflicting or superflous qualifier '$qualifier' at line $. of redif.spec file $spec_file";
                        }
                    } elsif ( 
                              ( $qualifier eq 'norep' ) 
                              or ( $qualifier eq 'nonrep' ) 
                              or ( $qualifier eq 'nonrepeatable' )
                              ) {
                        if( not defined $max ) { $max = 1; }
                        else {
                            warn "conflicting or superflous qualifier '$qualifier' at line $. of redif.spec file $spec_file";
                        }
                    } elsif ( 
                              ( $qualifier eq 'key' ) ) {
                        $min = '*';

                    } else {
                        die "unknown (wrong) attribute qualifier '$qualifier' at line $. file $spec_file";
                    }

                }
                
            } else {
                ($name, $type, $subtype, $min, $max) = split (/\s*:\s*/, $_ );
            }

            if (not defined $name) {
                warn "Bad attribute definition line at redif.spec line $. (ignored)";
                next;
            }

            if ( (defined $type) and ($type eq 'cluster') ) {
                my $n = $name;
                my $key = $_contexts->{$subtype}{'KEY'};
                $name .= "-$key";

                $con -> {$name} {'switch'} = $subtype;
                $con -> {$name} {'prefix'} = $n . '-';
                $con -> {SWITCHERS} -> {$n} = $subtype;

            } else {
                $con -> {$name} {'type'} = $type;
                $con -> {$name} {'subtype'} = $subtype;
            }


            ###   occurence statistics placeholders 

            $con -> {$name}{'COUNT'}{'CUR'}=0 ;
            $con -> {$name}{'COUNT'}{'ALL'}=0 ;
            $con -> {$name}{'COUNT'}{'MAX'}=0 ;

            if ( (defined $min) and ($min eq '*') )  {

                warn "attribute definition as a key makes no sense ".
                    "in record type definition at redif.spec, line $.";

            } elsif (defined $min and ($min) ) {
                if ($min !~ /^\d$/ ) {
                    warn "Min occurence constraint of attribute '$name' "
                        . "is incorrect at redif.spec, line $. (ignored)";
                    $min = 0;
                }
                $con -> {$name} {'min'} = $min;

            } else {
                $con -> {$name} {'min'} = undef;
            }

            if ( (defined $max) and ($max ne '')) {
                $con -> {$name} {'max'} = $max;
            }

          RECORD_TYPE_END:

        } elsif ( ($context eq 'type') ) {

            if ( $original =~  /^\}\s*$/ ) {
                # close type
                $context = 'root';
                $use = '';

            } else {
                die "internal error" if not defined $use;

                use ReDIF::setup_conf;

                if ( $use eq 'length' ) {
                  my $type_spec = $_types ->{$name} {$use};
                  if ( not defined $type_spec ) {
                    $type_spec = {};
                  }

                  if ( $_ =~ /^\s*(top|max|min|bottom):\s*(\d+)\s*$/i ) {
                    $type_spec -> {$1} = $2;
                  }

                  ### --LENGTH--
                  if ( ReDIF::Parser::value_length_checking ) {
                    $_types ->{$name} {$use} = $type_spec;
                  }
                  ### --/LENGTH--

                } else {

                  if ( defined $_types ->{$name} {$use} ) {
                    if ( $use eq 'check-eval' ) {
                      $_types ->{$name} {$use} .= "\n" ;
                    } 
                    $_types ->{$name} {$use} .= $_ ;
                  } else {
                    $_types ->{$name} {$use} = $_ ;
                  }
                }
            }

        } elsif ( $context eq 'postproc' ) {

            if ( $original =~  /^\}\s*$/ ) {
                # close type
                $context = 'root';
                undef $con ;
            } else {
                $con->{POSTPROC} .= $_;
                $con->{POSTPROC} .= "\n";
            }

        }
    }

    close SPEC;

    $self->{spec_file_loaded_ok} = 1;

    return 1;
}


###############################################################################
###   compile check-evals  method
###############################################################################
sub compile_check_evals {
    my $self = shift;
    my $types = $self->{types};

    foreach my $name ( keys %$types ) {
        my $type = $types->{$name};
        if( $type->{'check-eval'} ) {
            my $eval_expr = $type->{'check-eval'} ;
            
            my $package_name = 'ReDIF::Parser::Special';
            
            use ReDIF::Parser::Special;
            
            my $func_name = "${package_name}::check_eval_${name}" ; 
            
            my $definition = "package $package_name; \n"
                . " sub $func_name \{ $eval_expr  \} " ;

            { 
                no strict; 
                undef &$func_name;
                eval $definition;
            }
            
            if( $@ ) {
                warn "error at compiling a 'check-eval' section of redif.spec,"
                    . " datatype: $name ($@)";
                print STDERR "the EVAL : $definition \n";
            } else {
                $type->{'compiled-check-eval'} = \&{ $func_name } ;
#               warn "COMPILED CHECK-EVAL FOR TYPE: $name\n$definition\n";
            }
        }
    }


}




###############################################################################
###   compile context-postproc  method
###############################################################################
sub compile_context_postproc {
    my $self = shift;
    my $contexts = $self->{contexts};

    foreach my $name ( keys %$contexts ) {
        my $con = $contexts->{$name};
        if( $con->{POSTPROC} ) {
            my $eval_expr = $con->{POSTPROC} ;
            
            my $package_name = 'ReDIF::Parser::Special';
            
            use ReDIF::Parser::Special;
            
            my $func_name = "${package_name}::context_postprocessing_${name}" ; 
            
            my $definition = "package $package_name; \n"
                . " sub $func_name \{ $eval_expr  \} " ;

            { 
                no strict; 
                undef &$func_name;
                eval $definition;
            }
            
            if( $@ ) {
                warn "error at compiling a 'postprocessing' section of redif.spec,"
                    . " context: $name ($@)";
                print STDERR "the EVAL : $definition \n";
            } else {
                my $item = \&{ $func_name } ;
                my $list;
                if( defined $con->{POST_RULES} ) {
                    $list = $con->{POST_RULES};
                } else {
                    $list = []; 
                    $con->{POST_RULES} = $list;
                }
                push @$list, $item;
#               warn "compiled POST-PROCESSING for context $name";
            }
        }
    }


}


###############################################################################
###   compile  template  starters
###############################################################################

sub compile_template_starters {

    my $self = shift;
    
    
    my $_contexts  = $self->{contexts};
    my $_templates = $self->{templates};
    my $_types     = $self->{types};
    my $_starters  = $self->{starters};

    my $zero       = $_contexts->{zero};
    my $template   = $_contexts->{template};

    foreach my $s ( keys %$_starters ) {
        my $contexts = $_starters->{$s};
        
        if( scalar $contexts > 1 ) {
            $zero ->{ $s } = { switch => 'template' };
            $template -> { $s } = {};
        } else {
            $zero ->{ $s } = { switch => $contexts->[0] };
        }
    }


    if( DEBUG ) {
        
        ###  XXX 

        use Data::Dumper;      
        
        $Data::Dumper::Maxdepth = 2;
        
        $Data::Dumper::Varname = 'contexts';
        print "Spec.pm: ", Dumper( $self->{contexts} );
        
        $Data::Dumper::Varname = 'templates';
        print "Spec.pm: ", Dumper( $self->{templates} );

    }
}


###############################################################################

sub record_type {
    my $self = shift;
    my $rt_name = shift;

    return $self->{recordtypes}->{$rt_name};
}



###############################################################################
###   enable  record  type
###############################################################################

sub enable_record_type {

    my $self    = shift;
    my $rt_name = shift; # record type
        
    my $_contexts  = $self->{contexts};
    my $_templates = $self->{templates};
    my $_types     = $self->{types};
    my $_starters  = $self->{starters};

    my $zero       = $_contexts->{zero};
    my $template   = $_contexts->{template};

    my $rt_o       = $_contexts->{$rt_name};


    #XXX

    if( DEBUG ) {
        
        ###  XXX 

        use Data::Dumper;      
        
        $Data::Dumper::Maxdepth = 2;
        
        $Data::Dumper::Varname = 'contexts';
        print "Spec.pm: ", Dumper( $self->{contexts} );
        
        $Data::Dumper::Varname = 'templates';
        print "Spec.pm: ", Dumper( $self->{templates} );

    }
}



###############################################################################
###   build attribute list method
###############################################################################

sub build_attribute_list {
    my $self = shift;
    foreach my $templ ( values %{$self->{templates}} ) {
        $self-> build_attribute_list_of_context( $templ, '' );
    }
    die if not defined $self->{attribute_list};

}


sub build_attribute_list_of_context {   ### recursive
    my $self = shift;
    my $context_id = shift;
    my $prefix = shift ;

    my $template_type = shift;

    my $con = $self->context( $context_id );

    if ( not defined $self->{attribute_list} ) {
        $self->{attribute_list} = {};
    } 

    my $list = $self->{attribute_list};

    foreach my $attr ( keys %$con ) {

        ###  ignore special (up-case) context hash keys

        if ( $attr !~ /[A-Z]/ ) {   

            my $context_name = $con->{TEMPLATE};

            if( defined $context_name ) {
                $template_type = $context_name;
            }

            if ( defined $con->{$attr}{switch} ) {
                $self-> build_attribute_list_of_context ( 
                                $con->{$attr}{switch},
                                 $prefix . $con->{$attr}{prefix}, $template_type ) ;
            } else {
                my $prefixed_attr = $prefix . $attr;

                my $context_name = $template_type;

                if ( defined $list->{$prefixed_attr} ) {
                    my $the_list = $list->{$prefixed_attr};

                    if ( $the_list !~ /$context_name/ ) {   
                        ###  not yet mentioned there
                        $list -> {$prefixed_attr} .= " or '$context_name'";

                    } else {  
                        ### already mentioned: ignore 
                    }

                } else {
                    $list -> {$prefixed_attr} = "'$context_name'";
                }
            }
        }
    }
}

#############################################################################
#####################    t h e   e n d     ##################################
#############################################################################


1;



