#!/usr/bin/perl
# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

use Getopt::Long;
use IO::File;
use File::Basename;
use strict;

# --- Load local modules
use MPLib::Version;
use PLLib::Utils;
use PLLib::Sequence;

# --- Process command line options
my ( $inpfile, $outdir, $help, $dirstr, $usage, $filetag );

GetOptions (
           "i=s"	=>	\$inpfile,
           "d=s"	=>	\$outdir,
           "s=s"	=>	\$dirstr,
           "t=s"	=>	\$filetag,
           "help"	=>	\$help,
           "version"    => sub { VersionMessage() },
           );

# --- Usage
$usage  = undef;
$usage .= "\n$0\n";
$usage .= "\t-i        File containing sequence identifiers\n";
$usage .= "\t-d        Directory containing the sequences\n";
$usage .= "\t-s        Type of directory structure [SIMPLE|PDB|MODPIPE]\n";
$usage .= "\t-t        Unique name tag for files [<id>.*<tag>.*\.ext]\n";
$usage .= "\t--help    This help\n";
$usage .= "\t--version Report version number of this program.\n";
$usage .= "\n\n";

# --- Output help
if ( $help ) {
   print $usage;
   exit 0;
}

# --- Get Program name
my $subrname = GetSubrName();

# --- Verify command line options

  # -- Check for min. options
  unless ( $inpfile && $outdir && $filetag ){
     warn "${subrname}__E> Specify the -i, -d and -t options at the least\n";
     die  "$usage\n";
  }

  # -- Check for input file
  unless ( -e $inpfile ){
     die "${subrname}__E> Could not find input file: $inpfile\n";
  }

  # -- Set default directory structure type
  $dirstr = 'SIMPLE' unless ( $dirstr );

  # -- Check output directory
  unless ( -d $outdir){
     die "${subrname}__E> Could not find output directory: $outdir\n";
  }

# --- Read and process the file

  # -- Open the input file stream
  open(UNQ, "< $inpfile");

  while ( my $unq = <UNQ> ){
    chomp $unq;
 
    # -- Take the first field as the unq id
    my $idcode = (split(" ", $unq))[0];

    # --- Create the directory name for the sequence                           
    my $dirnam = undef;                                                        
    if ( $dirstr =~ /simple/i ){                                               
       $dirnam = "${outdir}/$idcode";                                          
    } elsif ( $dirstr =~ /pdb/i ){                                             
         my $subdir = substr($idcode, 1, 2);                                   
         $dirnam = "${outdir}/${subdir}/${idcode}";                            
      }                                                                        
      elsif ( $dirstr =~ /modpipe/i ){                                         
         my $subdir = substr($idcode, 0, 3);                                   
         $dirnam = "${outdir}/${subdir}/${idcode}/sequence";                            
      }                                                                        

    # -- Check if the directory exists
    unless ( -d $dirnam ){
       warn "Could not find directory: $dirnam\n";
    }

    # -- Figure out the filenames for .top, .log and .prf files
    my ( $topfile, $logfile, $prffile ) = undef;
    opendir(SEQDIR, "$dirnam");
    while ( my $file = readdir(SEQDIR)){
       next if ( $file =~ /^\.+$/ );
       $logfile = "${dirnam}/${file}" 
                  if ($file =~ /^${idcode}.*${filetag}.*\.log/);
       $topfile = "${dirnam}/${file}" 
                  if ($file =~ /^${idcode}.*${filetag}.*\.py/);
       $prffile = "${dirnam}/${file}" 
                  if ($file =~ /^${idcode}.*${filetag}.*\.prf/);
    }
    closedir(SEQDIR);

    # -- Fix the flags for existence
    my ($istop, $islog, $isprf) = 0;
    $istop = 1 if ( -e $topfile );
    $islog = 1 if ( -e $logfile );
    $isprf = 1 if ( -e $prffile );

    my ( $iter, $chi2low, $chi2high,
         $kstatlow, $kstathigh, $div, $numseq, $prflen ) = 0;

    if ( $islog ){

       # -- Open the log file
       my $fh_log = IO::File->new("< $logfile");

       # -- Check divergence, iterations etc
       ($iter, $chi2low, $chi2high, $kstatlow, $kstathigh, $div)
             = GetStatInfo($fh_log);

       close($fh_log);
    }

    if ( $isprf ){

       # -- Open the profile file
       my $fh_prf = IO::File->new("< $prffile");

       # -- Get profile details
       ($numseq, $prflen) = GetProfileStat($fh_prf);

       close($fh_prf);
    }

    # -- Print log
    printf "%-40s %1d%1d%1d %8.2f %3d %8.4f %8.4f  %8.4f %8.4f %1d  %8d %8d\n", 
           $idcode, 
           $istop, $islog, $isprf,
           0.0,
           $iter,
           $chi2low, $chi2high, $kstatlow, $kstathigh, $div,
           $numseq, $prflen;

  }

  # -- Close the file
  close(UNQ);

# -- SUBROUTINES
sub GetStatInfo {

use strict;

   #--- Get subroutine name
   my $subname = GetSubrName();

   #--- Check arguments
   my $nargs = 1;

   unless (scalar(@_) == $nargs){
      print "$subname __D> Insufficient arguments\n" ;
      return ;
   }

   #--- reset file position
   local *FH_LOG = $_[0];
   seek(FH_LOG,0,0);

   #--- Read through filehandle
   my $logln; my $div = 0;
   my @iters = my @chi2s = my @ksts = ();
   while ($logln = <FH_LOG>){
      chomp $logln;
      if ( $logln =~ /Iteration, Chi2, nbins, KS-Stat/ ){
         my ($iter, $chi2, $kst ) = (split(" ", $logln))[7,8,10] ;
         push @iters, $iter;
         push @chi2s, $chi2;
         push @ksts, $kst;
      }

      if ( $logln =~ /Profile appears to be diverging/ ){
         $div = 1;
      }
   }

   # -- No. of iterations
   my $niter = scalar(@iters);

   # -- Highest and lowest chi2
   @chi2s = sort { $a <=> $b } @chi2s;
   my $chi2_low = $chi2s[0];
   my $chi2_hgh = $chi2s[$#chi2s];

   # -- Highest and lowest kstat
   @ksts = sort { $a <=> $b } @ksts;
   my $kst_low = $ksts[0];
   my $kst_hgh = $ksts[$#ksts];

   #--- return stuff
   return($niter, $chi2_low, $chi2_hgh, $kst_low, $kst_hgh, $div);
}

sub GetProfileStat {

use strict;

   #--- Get subroutine name
   my $subname = GetSubrName();

   #--- Check arguments
   my $nargs = 1;

   unless (scalar(@_) == $nargs){
      print "$subname __D> Insufficient arguments\n" ;
      return ;
   }

   #--- reset file position
   local *FH_PRF = $_[0];
   seek(FH_PRF,0,0);

   #--- Read through filehandle
   my $prfln; my ($numseq, $prflen) = 0;
   while ($prfln = <FH_PRF>){
      chomp $prfln;
      $numseq = (split(" ", $prfln))[4]
              if ( $prfln =~ /^# Number of sequences:/ );
      $prflen = (split(" ", $prfln))[5]
              if ( $prfln =~ /^# Length of profile  :/ );
   }

   return($numseq, $prflen);
}
