package RePEc::Index::Update;

#  This perl module is part of RePEc-Index system.  This module is
#  responsible for the updates logic.  It looks at the files and
#  directory names supplied and remembers' their metadata and compares
#  it with old values.

#
#  Copyright (c) 2000-2006 Ivan Kurmanov, RePEc project.  All rights
#  reserved.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the same terms as Perl itself.
#

BEGIN { 
$VERSION = do { my @r=(q$Revision: 2.5 $=~/\d+/g); sprintf "%d."."%02d"x$#r,@r };
# $Id: Update.pm,v 2.5 2007/02/14 20:59:56 ivan Exp $
}

use strict;

use Carp qw( cluck );
use Carp::Assert;

use RePEc::Index;
use RePEc::Index::Collections;
use RePEc::Index::Config;
use RePEc::Index::Log;

use RePEc::Index::Storage qw(
                             &load_record_from_db_txn
                             &load_record_from_db_txn_readonly
                             &save_record_to_db_txn
                             &delete_record_from_db_txn
                             &start_transaction
                             &commit_transaction
                             &load_record_from_db
                            );

use Data::Dumper;

# some time ago this was optional, now its not:

use RePEc::Index::History::Handle;


# use Storable qw( thaw );

sub log_it {
    RePEc::Index::Log::log( @_ );
}

sub error   { log_it( 8, @_ ); }

sub warning { log_it( 6, @_ ); }




use vars qw( $ABORT $PAUSE  );
$ABORT = 0;    
$PAUSE = 0;

sub ABORT { $ABORT = 1; error "[$$] Aborting, got a signal"; }
sub PAUSE { $PAUSE = 1; }
sub CONTINUE { $PAUSE = 0; }





use Events;

Events -> register_event( "RePEc::Index::Update::DATAFILE_START", 
                          "start processing a data file" );

Events -> register_event( "RePEc::Index::Update::DATAFILE_FINISH", 
                          "finishing processing a data file" );

Events -> register_event( "RePEc::Index::Update::RECORD", 
                          "a record read from a data file" );

Events -> register_event( "RePEc::Index::Update::RECORD_IGNORED", 
                          "a record read from a data file, but ignored due to a bad id" );

Events -> register_event( "RePEc::Index::Update::RECORD_DISAPPEAR", 
                          "a record, used to be in a data file, not no longer there" );

Events -> register_event( "RePEc::Index::Update::DATAFILE_DISAPPEAR", 
                          "a data file, used to lie in a directory, but not no longer there" );


=pod

=head1 NAME

RePEc::Index::Update - RePEc-Index update session class

=head1 SYNOPSIS

 use RePEc::Index::Update;

 my $session = RePEc::Index::Update->new( );
 $session -> 


=head1 DESCRIPTION

This class contains update logic for the RePEc-Index.  RePEc-Index is
a database of files, directories and records in those files.  Although
it is currently designed for RePEc and ReDIF files, I'm going to
abstract the RePEc&ReDIF-dependent parts, to make all this more general.

The module has object-oriented interface.  The main object is an update
session.  When you have something new to process, you create an object, tell
it what you want to update and then close and delete it.

To create an object you use simple new() method.

To tell a session object what you want to update you use
process_directory( DIRECTORY ) method.

To close an object you use close() method.

A RePEc-Index installation can monitor an number of metadata collections.
...

=head2 INTERNALS

That is mainly for myself.  You are welcome to skip it.

Object has several internal variables -- object hash members: 

=over 4

=item * SESSION 

Timestamp of the session start, as number of seconds since the epoch.

=item * CURRENT_FILE_FULL

Full (absolute) pathname of the file being processed.

=item * CURRENT_FILE_SHORT

Short name of the file being processed.  Relative from the SOURCE DIR.

=item * TOO_OLD_IS

Seconds since last update of a file, that we think we should
re-process.  If a file didn't change recently, we normally won't
process it.  Unless it was processed too long ago.  This parameter
will set the measure for what is "too long ago".

=item * CURRENT_FILE_STATUS

Status of the file currently being processed.  Can be one of these:

 new
 known/changed
 known/too old
 known/unchanged  

=item * CURRENT_FILE_RECORD

A file-description record object, restored from the RePEc-Index
database or created to be stored there.

Created with 

 $record = RePEc::Index -> create_new_file_record ();

Currently this means the object is of class RePEc::Index::FILE and has
following members:

 filename
 first_observed
 last_observed
 last_changed
 last_read
 templates_list

Each of these has a corresponding get-method and set-method
(add "_set" to the member name).

=back

=head2 METHODS

=over 4

=item * process_file

=item * read_file

=item * process_directory



=back


=cut


#############################################################################
# sub    N E W 
#############################################################################
#
sub new {
  my $class           = shift;
  my $collection_name = shift || die;
  my $self            = { @_ };

  $self->{SESSION} = time();
  $self->{collection_name} = $collection_name;

  my $collection = $self->{collection} = 
    $RePEc::Index::COLLECTIONS->{$collection_name};

  if ( not $collection ) {
    die "Invalid (unknown) collection: '$collection_name'";
  }

  if ( $collection -> {proc} ) {
    foreach my $proc ( @{ $collection->{proc} } ) {
      eval " use $proc; ";
      if ( $@ ) {
        warn $@;
      }
    }
    $self->{proc} = $collection->{proc};
  }

  $self->{keep_records_db} = $collection->{options}{records_db};

  my $global_datadir = $RePEc::Index::Config::DATADIR;

  my $datadir = $self->{datadir} 
    = $global_datadir . "/" . $collection->{prefix};

  if ( not -d $datadir and not -e _ ) {
    mkdir $datadir;
    die "Failed to create a collection's data dir: $datadir"
      if not -d $datadir;
  }


  RePEc::Index::Storage::prepare_db_env( $RePEc::Index::Config::DATADIR );
  

  $self->{sourcedir} = $collection->{home};

  die "Bad collections' source dir: " . $self->{sourcedir} 
    if not -d $self->{sourcedir};

  $self->{files_db}   = "$datadir/files";
  $self->{records_db} = "$datadir/records";
  $self->{history_db} = "$datadir/history";
  $self->{conflict_db}= "$datadir/conflicts";

  $self->{queue}     = [];
  $self->{queuehash} = {};

  log_it( 7, "[$$] $0, starting update session, timestamp: ", $self->{SESSION} );

  
  # options and settings

  $self->{TOO_OLD_IS} = $self->{TOO_OLD_IS} || 60 * 60 * 24 * 7;  
                    # an unchanged file's time without processing

  bless $self, $class;
}




sub close {
    my $self = shift;
    log_it( 7, "[$$] finished update session, timestamp: ", $self->{SESSION} );
}


sub DESTROY {
  my $self = shift;
  $self -> close;
}

sub FORCE { 1 }



##############################################################################
# sub    C H E C K    F I L E 
##############################################################################
#
sub check_file {
  my $self           = shift;
  my $short_filename = shift || die;
  my $force          = shift;  # Read the file even if it is not old yet?

  my $result = 1;
  my $datadir   = $self->{datadir};
  my $sourcedir = $self->{sourcedir};
  my $filename  = "$sourcedir/$short_filename";
  my $session_time = $self->{SESSION} = time();
  my $too_old_is   = $self->{TOO_OLD_IS};  # an unchanged file's
                                           # time without
                                           # processing
  my $ghost_file;

  if ( $filename =~ m/\/\// ) {
    $ghost_file = 1;  ### legacy fix
    warning "A ghost file update request: $short_filename";
  }

  $self->{CURRENT_FILE_FULL}  = $filename;
  $self->{CURRENT_FILE_SHORT} = $short_filename;

  if ( not -e $filename
       or $ghost_file ) {
    # sanity check
    my $record =
      load_record_from_db_txn_readonly( 0, $self->{files_db}, $short_filename );
    if ( not defined $record ) {
      warn "File $filename doesn't exist and not known";
      return 1;

    } else {
      $self->disappeared_file( $short_filename );
    }
    return 1;
  }

  my $directory = 0;
  if ( -d $filename ) { $directory = 1; }

  my $lastmodtime = ( stat( $filename ) ) [9];
  my $status; # new / old, but changed / old, but unchanged
  my $record;
  my $file_records = [];
  my $readfile = 0; # flag

  my $txn = start_transaction();
  $self -> {CURRENT_TXN} = $txn;

  eval {
    $record = load_record_from_db_txn_readonly( 
         $txn, $self->{files_db}, $short_filename );
  };
  if ($@) {
    log_it 9, "[upd] can't load file record of $short_filename";
    warn "can't load file record of $short_filename\n";
  }

  if ( $record and $directory ) {
    my $dump = Dumper( $record );
    log_it 9, "check_file() was called on a directory $short_filename: $dump";
    print "check_file() was called on a directory $short_filename: $dump\n";
    return $self->process_directory( $short_filename, $force );
  }

  if ( ref $record and $record->_type eq 'RePEc::Index::DIR' ) {
    my $dump = Dumper( $record );
    log_it 9, "dir record: $dump where a file record was expected $short_filename; clearing";
    print "dir record: $dump where a file record was expected $short_filename; clearing\n";
    undef $record;
  }
  
  if ( not defined $record ) {
    log_it 5, "[upd] file $short_filename: new";
    $status = 'new';
    $record = RePEc::Index -> create_new_file_record;
    $record -> filename_set( $short_filename );
    $record -> first_observed_set( $session_time );
    $record -> templates_list_set( [] );
#    $record -> present_set ( 1 );
    $readfile = 1;

  } else {
    my $mark     = $record -> last_observed;
    my $lastread = $record -> last_read;
    my $previous_last_modified = $record -> last_modified;

    if ( $lastmodtime > $mark ) {
      ### file has changed since last update
      log_it 5, "[upd] file $short_filename: known, changed";

      $record -> last_changed_set( $session_time );

      $status = "known/changed";
      $readfile = 1;

    } elsif ( defined $previous_last_modified 
              and $previous_last_modified != $lastmodtime ) {
      log_it 5, "[upd] file $short_filename: known and modif time changed";

      $status = "known/changed";
      $readfile = 1;

    } else {

      if ( $session_time - $lastread > $too_old_is ) {
        $readfile = 1;
        log_it 3, "[upd] file $short_filename: known, unchanged, but to be processed";
        $status = "known/too old";

      } else {
        log_it 3, "[upd] file $short_filename: known and unchanged";
        $status = "known/unchanged";
      }
    }
#    $record -> present_set ( 1 );
  }

  $record -> last_observed_set( $session_time );
  $record -> last_modified_set( $lastmodtime  );

  $self->{CURRENT_FILE_STATUS} = $status;
  $self->{CURRENT_FILE_RECORD} = $record;



  if ( $force ) { 
    if ( not $readfile ) {
      log_it 3, "forced to read it";
    }
    $readfile = 1; 
  }

  if ( $readfile ) {
    $result &= $self-> read_file ( $filename, $record );
    if ( $result ) {
      $record -> last_read_set( $session_time );
      $self->{files_processed}{$short_filename} ++;  
    }

  }


  my $r = save_record_to_db_txn( $txn, $self->{files_db}, $short_filename, $record );
  if ( not defined $r 
       or  $r != 0 ) {
    my $err = $BerkeleyDB::Error;
    if ( $BerkeleyDB::Error eq 'Successful return: 0' ) {
      $err = $r;
    }
    warn "save record problem: $! / $err\n";
    undef $!;
  }

  commit_transaction( $txn );
  $self -> {CURRENT_TXN} = undef;

  return $result;
}

#
# 2004-07-13 17:07 debug tool
sub p {
#   print @_, " " x 10, "\n";
}

############################################################################
# sub    R E A D    F I L E 
############################################################################
#
sub read_file {
  my $self     = shift;
  my $filename = shift;
  my $record   = shift;

  my $short_filename = $record-> filename;
  my $collection     = $self  -> {collection};

  if ( $ABORT ) { return 0; }

  my $f = $collection ->open_data_file( $filename );
  
  my $handles_expected = $record -> templates_list;
  my @handles_found;
  my %handles_found;

#  print "Expecting: ", join( ' ', @$handles_expected ), "\n";

  if ( not $f ) {
    if ( not scalar @$handles_expected ) {
      ###  Nothing else to do: nothing in the file and nothing we
      ###  expected.  Not really a useful datafile.
      return 1;
    }
  }


  Events -> RePEc::Index::Update::DATAFILE_START( $short_filename, $self );

  my %handles_expected = ();
  foreach my $h ( @$handles_expected ) {
    $handles_expected{$h}++;
  }

  while ( $f ) {
#    if ( $ABORT ) { last; }

    my ( $ha, $r, $type, $pos, $checksum ) = $collection -> get_next_record() ;

    last if not $r;

#    print "Rec $ha, t: $type\n";

    if ( not $collection -> check_id( $ha ) ) {
      Events -> RePEc::Index::Update::RECORD_IGNORED( $ha, $r, $type, $short_filename, 
                                                      $pos, $checksum, $self );
      next;
    }
    push @handles_found, $ha;
    $handles_found{$ha} ++;

    Events -> RePEc::Index::Update::RECORD( $ha, $r, $type, $short_filename, 
                                            $pos, $checksum, $self );
  }

  $record -> templates_list_set( \@handles_found );

  foreach my $h ( @$handles_expected ) {
    if ( $handles_found{$h} < $handles_expected{$h} ) {
      Events -> RePEc::Index::Update::RECORD_DISAPPEAR( $h, $short_filename, $self );
    }
    $handles_expected{$h}--;
  }

  Events -> RePEc::Index::Update::DATAFILE_FINISH( $short_filename, $self );

  return 1;
}



############################################################################
# sub    D I S A P P E A R E D    F I L E 
############################################################################
#
sub disappeared_file {
  my $self = shift;
  my $file = shift;
  my $txn  = shift || $self ->{CURRENT_TXN};

  log_it( 6, "disappeared: $file" );

  my $is_directory;

  my $transaction_responsible = 0;
  if ( not $txn ) {
    $txn = start_transaction();
    $transaction_responsible = 1;
  }
  
  my $frecord = 
    load_record_from_db_txn( $txn, $self->{files_db}, $file );

  if ( not defined $frecord ) {
    warn "Can't find file's record: $file";
    if ( $transaction_responsible ) {
      commit_transaction( $txn );
      undef $txn;
    }

    return;
  }

  if ( $file =~ m/\/$/ ) {
    $is_directory = 1;
  }

  delete_record_from_db_txn( $txn, $self->{files_db}, $file );
  if ( $transaction_responsible ) {
    commit_transaction( $txn );
    undef $txn;
  }

  if ( not $is_directory ) {
    # that is if it is a plain file

    ### because directory records don't have a templates list field
    my $t = $frecord -> templates_list();
    foreach ( @$t ) {
      Events -> RePEc::Index::Update::RECORD_DISAPPEAR( $_, $file, $self );
    }
    Events -> RePEc::Index::Update::DATAFILE_DISAPPEAR( $file, $self, @$t );  

  } else {
    
    my $list = $frecord -> files_list();
  
    foreach ( @$list ) { 
      $self-> disappeared_file( "$file$_" );
    }
    Events -> RePEc::Index::Update::DATAFILE_DISAPPEAR( $file, $self );  
  }

}



############################################################################
# sub    A D D    T O    Q U E U E 
############################################################################
#
sub add_to_queue {
  my $self = shift;
  
  my $queue = $self->{queue};
  my $qh    = $self->{queuehash};
  
  my @add = ();
  my $now = $self->{CURRENT_FILE_SHORT};
  foreach ( @_ ) {

    if ( $self ->{files_processed} {$_} > 1 ) {
      print "trying to queue a twice-processed file $_\n";
      next;
    }

    if ( $_ eq $now ) { next; }
    if ( $qh->{$_}  ) { next; }
    push @$queue, $_;
    $qh ->{$_} = 1;
    push @add, $_;
  }

  if ( scalar @add ) {
    print "queued: ", join ' ', @add, "\n";
  }
  return \@add;
}

sub requeue {
  my $self = shift;
  my $file = shift || die;

  my $queue = $self->{queue};
  my $qh    = $self->{queuehash};

  if ( $self ->{files_processed} {$file} > 1 ) {
    print "trying to queue a twice-processed file $file\n";
    return;
  }
  
  my $nqueue = [];
  if ( $qh->{$file} ) {

    if ( not scalar @$queue ) {
      print "can't requeue, queue is empty; use add_to_queue() instead\n";
      return;
    }

    foreach ( @$queue ) {
      if ( $_ eq $file ) { undef $_; }
    }
    $self -> {queue} = $nqueue;

  } else {
    $nqueue = $queue;
    $qh ->{$file} = 1;
  }
  push @$nqueue, $file;

  print "requeued: $file\n";
}




############################################################################
# sub    P R O C E S S    Q U E U E 
############################################################################
#
sub process_queue {
  my $self   = shift;
  my $result = 1;

  my $queue  = $self->{queue};
  my $qh     = $self->{queuehash};

  while ( $_ = shift @$queue ) {

    if ( $self ->{files_processed} {$_} > 1 ) {
      delete $qh->{$_};
      print "process_queue: skipping already many times processed $_\n";
      next;
    }

    if    ( $ABORT ) { return 0; }
    while ( $PAUSE ) { sleep 3; }
 
    if ( /\/$/ 
         or $_ eq '' ) {
      $result &= $self->process_directory( $_ );

    } else {
      $result &= $self->check_file( $_, FORCE );
    }
    delete $qh->{$_};
  }

  return $result;
}


############################################################################


###############################################################################
# sub    P R O C E S S    T H I S 
###############################################################################
#

sub process_this { 
  my $self           = shift || die;
  my $short_filename = shift || die;
  my $force          = shift;  # force reading the file?

  # strip the initial slash char "/", if it is there
  $short_filename =~ s!^/!!;
  # strip the trailing slash char if it is there
  $short_filename =~ s!/+$!!;

  my $result    = 1;
  my $sourcedir = $self ->{sourcedir};
  my $filename  = "$sourcedir/$short_filename";

  $self->{CURRENT_FILE_FULL}  = $filename;
  $self->{CURRENT_FILE_SHORT} = $short_filename;

  my $filerecord =
    load_record_from_db_txn_readonly( 0, $self->{files_db}, $short_filename );

  my $dirrecord =
    load_record_from_db_txn_readonly( 0, $self->{files_db}, "$short_filename/" );

  if ( $short_filename eq '' ) {
    $result &= $self->process_directory( $short_filename, $force );

  } elsif ( -f $filename 
            or $filerecord ) {
    $result &= $self->check_file( $short_filename, FORCE );

    if ( $short_filename =~ /(.*[\\\/])([^\/\\]+)/ ) {
      my $dir = $1;
      $result &= $self->check_directory_for_a_file( $dir, $2 );
    }

  } elsif ( -d $filename 
            or $dirrecord ) {
    $result &= $self->process_directory( $short_filename, $force );

  } else {
    warning 
"Processing requested, but there's no such object in the filesystem: $short_filename";
    warn 
"Processing requested, but there's no such object in the filesystem: $short_filename";
    return undef;
  }

  $result &= $self->process_queue;

  return $result;
}




###############################################################################
# sub    P R O C E S S    D I R E C T O R Y 
###############################################################################
#

sub process_directory {
  my $self = shift;
  my $dir  = shift;  # we assume that the directory is below our sourcedir

  my $result = 1;
  my $collection = $self -> {collection};

  $dir = "$dir/";
  $dir =~ s!\/+!\/!g;
  $dir =~ s!^/!!;

  $self->{files_processed}{$dir}++;

  if ( $ABORT ) { return 0; }

  log_it( 6, "Update request for directory: '$dir'" );

  if ( $dir =~ m!\./!
       or $dir =~ m!\.\./! ) {
    warn "Suspicious directory path to update: '$dir'";
    log_it( 8, "Suspicious directory" );
  }
  
  my $sourcedir = $self->{sourcedir};

  assert( $sourcedir );
  assert( -d "$sourcedir/$dir" );

  my $dir_full = "$sourcedir/$dir";
  log_it( 6, "full path: $dir_full" );

  my $session_time = $self->{SESSION} = time();
  my $drecord;
  my $status;

  $drecord = 
    load_record_from_db_txn_readonly( 0, $self->{files_db}, $dir );

  if ( defined $drecord and 
       ref $drecord eq 'RePEc::Index::FILE' ) {
    my $d = Dumper( $drecord );
    my $m = "a file record, where a directory expected: $d ($dir); clearing!";
    log_it( 9, $m );
    print $m, "\n";
  }

  if ( defined $drecord 
       and ref $drecord eq 'RePEc::Index::DIR' ) {
    log_it( 6, "the directory is in the db" );

  } else {
    log_it( 6, "the directory is new to the db" );

    $drecord = RePEc::Index -> create_new_dir_record();
    $drecord -> filename_set( $dir );

    $drecord -> first_observed_set ( $session_time );
  }


  my $dir_check = $collection -> monitor_dir_checker;
  my $fil_check = $collection -> monitor_file_checker;

  opendir DIR, $dir_full;
  my @items = readdir DIR;
  closedir DIR;

  my @children;
  my %children;

  foreach my $i (@items) {
    if ( $i =~ m/^\.{1,2}$/ ) { next; }

    if ( $ABORT )    {  return 0;  }
    while ( $PAUSE ) {  sleep 3;   }
 
    my $sfname = "$dir$i";
    my $ffname = "$dir_full$i";

    $_ = $sfname;

    if ( -d $ffname ) {
      if ( &$dir_check() ) {
        $result &= $self->process_directory( $sfname );
        push @children, "$i/";
        $children{"$i/"} = 1;
      }

    } else {
      if ( &$fil_check() ) {
        push @children, $i;
        $children{$i} = 1;

#        next if $self->{files_processed}{$sfname} ;
        $result &= $self->check_file( $sfname );
      }
    }
  }

  if ( $ABORT ) { return 0; }
 

  my $flist = $drecord->files_list();
  foreach ( @$flist ) {
    if ( $children{$_} ) {
    } else {
      $self->disappeared_file( "$dir$_" ); # XXX
      if    ( $ABORT ) {  return 0;  }
      while ( $PAUSE ) {  sleep 3;   }
     }
  }


  $drecord -> files_list_set ( \@children );
  $drecord -> last_observed_set ( $session_time );
#  $drecord -> present_set ( 1 );

  my $txn = start_transaction();
  my $r = save_record_to_db_txn( $txn, $self->{files_db}, $dir, $drecord );
    if( not defined $r 
        or  $r != 0 ) {
      warn "save record problem: $! / $BerkeleyDB::Error";
    }

  commit_transaction( $txn );

  $result &= $self->process_queue;
 
  print "processed '$dir', found: ", join ( ' ', @children ), 
    ( not $result ) ? " (aborted)": "" ,  "\n";

  return $result;
}






###############################################################################
# sub    C H E C K    D I R E C T O R Y    F O R    A    F I L E 
###############################################################################
#

sub check_directory_for_a_file {
  my $self = shift;
  my $dir  = shift;
  my $file = shift;

  log_it( 6, "Update request for a file in directory: '$file' in '$dir'" );

  if( $dir =~ m!\./!
      or $dir =~ m!\.\./! ) {
    warn "Suspicious directory path to update: '$dir'";
    log_it( 8, "Suspicious directory" );
  }


  my $txn = start_transaction();

  my $drecord = load_record_from_db_txn( $txn, $self->{files_db}, $dir );

  if ( defined $drecord ) {
    log_it( 6, "the directory is in the db" );

  } else {
#    log_it( 6, "the directory is new to the db" );

    commit_transaction( $txn );
    undef $txn;

    return $self->process_directory( $dir );
  }

  my $is_file_known = 0;

  my $flist = $drecord->files_list();
  foreach ( @$flist ) {
    if ( $_ eq $file ) {
      $is_file_known = 1;
    }
  }

  if ( not $is_file_known ) {
    push @$flist, $file;

    $drecord -> files_list_set ( $flist );
  
    my $r = save_record_to_db_txn( $txn, $self->{files_db}, $dir, $drecord );
    if ( not defined $r 
        or  $r != 0 ) {
      warn "save record problem: $! / $BerkeleyDB::Error";
    }
    commit_transaction( $txn );

    return $self->process_queue;
  }

  return 1;
}



sub file_mini_check {
  my $self = shift;
  my $file = shift;
  
  my $record = load_record_from_db_txn_readonly( 0, $self->{files_db}, $file );

  my $sourcedir = $self->{sourcedir};
  my $filename  = "$sourcedir/$file";

  if ( -e $filename and -r _ ) {
    my $lastmodtime = (stat _)[9] || die;

    if ( not $record ) {
      $self -> add_to_queue( $file );
      return 1;
    }

    my $mark     = $record -> last_observed;
    my $lastread = $record -> last_read;
  
    if (    $lastmodtime >= $mark 
         or $lastmodtime >= $lastread 
       ) {
      ### file has changed since last update
      $self -> add_to_queue( $file );
      return 1;
    }

  } else {
    # file does not exists, probably disappeared
    if ( $record ) {
      $self -> disappeared_file( $file );
      return 2; 
    }
   
  }

  return 0;
}



sub file_needs_update {
  my $self = shift || die;
  my $file = shift || die;
  
  my $record = load_record_from_db_txn_readonly( 0, $self->{files_db}, $file );

  my $sourcedir = $self->{sourcedir};
  my $filename  = "$sourcedir/$file";

  if ( -e $filename and -r _ ) {
    my $lastmodtime = (stat _)[9];

    die if not defined $lastmodtime;
    if ( not $record ) { return 1; }

    my $mark     = $record -> last_observed;
    my $lastread = $record -> last_read;
  
    if (    $lastmodtime >= $mark 
         or $lastmodtime >= $lastread ) {
      ### the file has changed since last update
      return 1;
    }

  } else {
    # file does not exist, probably disappeared
    if ( $record ) { return 2; }
  }

  return 0;
}


sub get_file_record {
  my $self = shift;
  my $file = shift;
  
  return load_record_from_db_txn_readonly( 0, $self->{files_db}, $file );
}






# the end of RePEc::Index::Update;

1;

__END__
