#!/usr/bin/perl

# Originally created: March 4th, 2000, Ivan Kurmanov
#

package RePEc::Iterator;

###   This package is a tool for RePEc/ReDIF-developers.  It helps to
###   build applications which work on the whole RePEc/ReDIF dataset,
###   like rewe, for example.  This module when given a ReDIF root
###   directory, can traverse the dataset, archive by archive,
###   starting with the infrastructure files (e.g. XXXarch.rdf,
###   XXXseri.rdf, XXXmirr.rdf), and then finding all ReDIF data-files
###   of the archive.  This module uses Events module for a convenient
###   call-back system.  


$VERSION = "0.2";

use Events;


# use Carp::Assert (':NDEBUG');
# sub DEBUG {};


use File::Find;

& prepare( );

sub prepare {

    Events -> register_event ( 'REDIF::ARCHIVE::STARTED' );
    Events -> register_event ( 'REDIF::ARCHIVE::FINISHED' );
    Events -> register_event ( 'REDIF::FILE::NORMAL' );
    Events -> register_event ( 'REDIF::FILE::ARCHIVE' );
    Events -> register_event ( 'REDIF::FILE::SERIES' );
    Events -> register_event ( 'REDIF::FILE::MIRROR' );

    Events -> register_event ( 'REPEC::ITERATOR::ERROR' );
    Events -> register_event ( 'REPEC::ITERATOR::WARNING' );
    Events -> register_event ( 'REPEC::ITERATOR::MESSAGE' );

}

###################################
###################################
###################################

use strict;

use vars qw( 
	     $redif_remo_dir
	     $redif_all_dir

	     $authority
	     $archive
	     $archive_id
	     @ARCHIVES

	     $file
	     $file_name_short 

	     $archive_files_counter 
	     
	     $REPORT_EVENT

	     $DIR
	     %DIR

	     );

sub p {
    Events -> REPEC::ITERATOR::MESSAGE ( join '', @_ );
}

sub e {
    Events -> REPEC::ITERATOR::ERROR ( join '', @_ );
}

sub w {
    Events -> REPEC::ITERATOR::WARNING ( join '', @_ );
}



my ( $the_dir );
my ( $archives_counter );


sub start {
    return main ( @_ );
}

sub main {

    my $redif_dir   = shift;
    $authority      = shift;
    $redif_remo_dir = "$redif_dir/remo";
    $redif_all_dir  = "$redif_remo_dir/all";
    
    $the_dir = "$redif_all_dir";

    p "Started looking for ReDIF in: '$the_dir'";

    opendir ( ALLDIR, $the_dir ) or 
	die "can't open the all '$the_dir' directory to read";

    my $dir_entry ;
    
  MAIN_LOOP:
    while ( $dir_entry = readdir ALLDIR ) {
	
	if ( $dir_entry =~ m|^([a-z]{3})arch\.rdf$|i ) {

	    $archive_id = lc $1;
	    $archive    = "$authority:$archive_id";

	    my $archive_dir = $redif_remo_dir . lc( "\/$archive_id" );

#	    assert ($archive_id);

	    {
		my $result = eval {
		    Events -> REDIF::ARCHIVE::STARTED ( $archive );
		} ;
		&e ( $@ ) if $@;
		if ( $@ or not $result ) {
		    Events -> REDIF::ARCHIVE::FINISHED ( '' );
		    next MAIN_LOOP;
		}
	    }


	    my $arch_rdf_file;
	    my $seri_rdf_file;
	    my $mirr_rdf_file;
	    { 
		###  browse the archive directory in search of the 
		###  special files: XXXarch.rdf, XXXseri.rdf and XXXmirr.rdf

		opendir( ARCHDIR, $archive_dir );
		my @files = readdir ARCHDIR;
		closedir ARCHDIR;
# debugging:
#		print "files of archive $archive_id: " ;
#		print join " ", @files;
#		print "\n";

		( $arch_rdf_file ) = grep m/${archive_id}arch\.rdf$/i, @files;
	        ( $seri_rdf_file ) = grep m/${archive_id}seri\.rdf$/i, @files;
        	( $mirr_rdf_file ) = grep m!${archive_id}mirr\.rdf$!i, @files;
		;
	    };

            ###  let the user know if not all special files are found.
            ###  this is a bad sign for an archive 

	    if( not defined $arch_rdf_file ) {
		e "${archive_id}arch.rdf file not found!";
		Events -> REDIF::ARCHIVE::FINISHED ( '' );
		next MAIN_LOOP;
	    } else {
	        $arch_rdf_file = "$archive_dir/$arch_rdf_file" ;
#		p "${archive_id}arch.rdf file: $arch_rdf_file\n";
	    }

	    if( not defined $seri_rdf_file ) {
		w "${archive_id}seri.rdf file not found!";
#		Events -> REDIF::ARCHIVE::FINISHED ( '' );
#		next MAIN_LOOP;
	    } else {
	        $seri_rdf_file = "$archive_dir/$seri_rdf_file" ;
#		p "${archive_id}seri.rdf file: $seri_rdf_file\n";
	    }

	    if( not defined $mirr_rdf_file ) {
#		w "${archive_id}mirr.rdf file not found!";
	    } else {
	        $mirr_rdf_file = "$archive_dir/$mirr_rdf_file" ;
#		p "${archive_id}mirr.rdf file: $mirr_rdf_file\n";
	    }

	    $archive_files_counter = 0;

            {
		###  now make up the short filename version and 
		###  start archive template processing

		$arch_rdf_file =~ /^$redif_remo_dir\/(.+)$/ ;
                my $short_arch = $1;  

		eval {
		    &archive_file ( $arch_rdf_file, $short_arch );
		};
		if ( $@ ) {      &e( $@ );   }
	    }

            if ( $seri_rdf_file ) {
		###  series file processing

		$seri_rdf_file =~ /^$redif_remo_dir\/(.+)$/ ;
                my $short_seri = $1;  

		eval {
		    &series_file ( $seri_rdf_file, $short_seri );
		};
		if ( $@ ) {      &e( $@ );   }
	    }

            if ( $mirr_rdf_file ) {
		###  mirror template/file processing

		if ( -r $mirr_rdf_file ) {  ###  this is not obligatory
		    $mirr_rdf_file =~ /^$redif_remo_dir\/(.+)$/ ;
		    my $short_mirr = $1;  
		    eval {
			&mirror_file ( $mirr_rdf_file, $short_mirr );
		    };
		    if ( $@ ) {      &e( $@ );   }
		}
	    }


	    ###  now search for usual (normal) data files
               	    
	    eval {
		find ( \&redif_file_found, "$redif_remo_dir/$archive_id" );
	    };
	    if ( $@ ) {   &e( $@ );   }

	    SECOND_BEST : {
		if ( not $archive_files_counter ) {
		    last SECOND_BEST
			if not -e "$redif_dir/$archive_id" ;
		    eval {
			find ( \&redif_file_found, "$redif_dir/$archive_id" ) ;
		    };
		    if ( $@ ) {
			&e( $@ );
		    }
		}
	    }


	    Events -> REDIF::ARCHIVE::FINISHED ( $archive_id );

	    $archive_id = '';

#	    p "Archive's files:\t$archive_files_counter\n";
	    
	} else {    
#	    print "rejected all/ entry: $dir_entry\n";
	}
    }

    p "Finished looking for ReDIF in: '$the_dir'";

}


#####################################################################
## redif_file_found
#

sub redif_file_found {
   if ( 
	($File::Find::name =~ /${archive_id}seri\.rdf$/i ) or
        ($File::Find::name =~ /${archive_id}arch\.rdf$/i ) or
        ($File::Find::name =~ /${archive_id}mirr\.rdf$/i ) 
    )      
                                               {   return   }

   if (/\.rdf$/i) {
    
       my $fname = $File::Find::name;   

       $archive_files_counter ++;

       if ( $fname =~ /^$redif_remo_dir(?:\/)?(.+)/ ) {

            $file_name_short = $1; 

       } else {
 
            $fname =~ /\.\.\/(${archive_id}\/.+)/;
            $file_name_short = $1;
       }

       if ( not $file_name_short ) {
            &w (
       " Can't produce short filename from full name: $File::Find::name" );
            $fname =~ /\/([a-z]{3}\/.+)/i;
            $file_name_short = $1;
       }

       my $t = &process_file ( $fname, $file_name_short ) ;

   }
}


#####################################################################
## process_file
#

sub process_file {

    my $file = shift;
    my $file_name_short = shift;

    my $c = 
	Events -> REDIF::FILE::NORMAL ( $file, $file_name_short, $archive_id );

    return $c;
}

##################################################################
##  ARCHIVE_FILE
#

sub archive_file {

    my $file = shift;
    my $short = shift;

#    print "Archive: $archive_id\n";

    push @ARCHIVES, $archive_id;
	    
    my $r = 
       Events -> REDIF::FILE::ARCHIVE ( $file, $short, $archive );
  
    return $r;
}

##################################################################
##  SERIES_FILE
#
sub series_file {

    my $file = shift;
    my $short = shift;

    my $r = 
	Events -> REDIF::FILE::SERIES ( $file, $short, $archive_id );
    return $r;

}


##################################################################
##  MIRROR_FILE
#
sub mirror_file {

    my $file = shift;
    my $short = shift;

    my $r = Events -> REDIF::FILE::MIRROR ( $file, $short, 
					    $archive_id );
    return $r;

}


1;
