# This file is part of ModPipe, Copyright 1997-2020 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

package PLLib::ModProfile;
require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw( ReadProfileLines CheckSummaryPPS GetRangePrfAli 
              ProfileStatInfo );


use strict;
use PLLib::Utils;

sub ProfileStatInfo {

use strict;

   #--- Get subroutine name
   my $subname = GetSubrName();

   #--- Check arguments
   my $nargs = 1;

   unless (scalar(@_) == $nargs){
      print "$subname __D> Insufficient arguments\n" ;
      return ;
   }

   #--- reset file position
   local *FH_LOG = $_[0];
   seek(FH_LOG,0,0);

   #--- Read through filehandle
   my $logln; my $div = 0;
   my @iters = my @chi2s = my @ksts = ();
   while ($logln = <FH_LOG>){
      chomp $logln;
      if ( $logln =~ /Iteration, Chi2, nbins, KS-Stat/ ){
         my ($iter, $chi2, $kst ) = (split(" ", $logln))[7,8,10] ;
         push @iters, $iter;
         push @chi2s, $chi2;
         push @ksts, $kst;
      }

      if ( $logln =~ /Profile appears to be diverging/ ){
         $div = 1;
      }
      # Return undef if Modeller encountered an error
      if ( $logln =~ /E>/) {
         return;
      }
   }

   # -- No. of iterations
   my $niter = scalar(@iters);

   # -- Highest and lowest chi2
   @chi2s = sort { $a <=> $b } @chi2s;
   my $chi2_low = $chi2s[0];
   my $chi2_hgh = $chi2s[$#chi2s];

   # -- Highest and lowest kstat
   @ksts = sort { $a <=> $b } @ksts;
   my $kst_low = $ksts[0];
   my $kst_hgh = $ksts[$#ksts];

   #--- return stuff
   return($niter, $chi2_low, $chi2_hgh, $kst_low, $kst_hgh, $div);
}


sub GetRangePrfAli {

   #--- Get subroutine name
   my $subname = GetSubrName();

   #--- Check arguments
   my $nargs = 2;

   unless (scalar(@_) == $nargs){
      print "$subname __D> Insufficient arguments\n" ;
      return;
   }

   #--- reassign input variables
   my ($target_ln, $templates_ln) = @_;

   # --- Extract the sequences from the profile lines
   my $target = (split(" ", $target_ln))[12];
   my @templates = ();
   foreach my $template (@$templates_ln){
      push @templates, (split(" ", $template))[12];
   }

   #--- Check lengths
   my $tgtlen = length($target);
   foreach my $template (@templates){
      if ( length($target) ne length($template) ){
         print "$subname __E> Target/Template don't match in length\n";
         return;
      }
   }

   #--- Initialize variables
   my ( $i, $template_beg, $template_end, $offset ) = undef;

   #-- Get the beginning position of template
   $template_beg = length($target);
   foreach my $template (@templates){
      for($i = 0; $i < length($template); $i++ ){
         if ( substr($template, $i, 1) ne '-' ){
            $template_beg = $i if ($i < $template_beg);
            last;
         }
      }
   }

   #-- Get the ending position of template
   $template_end = 0;
   foreach my $template (@templates){
      for($i = length($template); $i > 0; $i-- ){
         $offset = $i - length($template) - 1;
         if ( substr($template, $offset, 1) ne '-' ){
            $template_end = $i if ($i > $template_end);
            last;
         }
      }
   }

   # -- Return beg/end ranges
   return($template_beg, $template_end);
}


sub CheckSummaryPPS {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 1;

   unless (scalar(@_) == $nargs){
      print "${subname}__D> Insufficient arguments\n" ;
      return;
   }

   # --- reassign input arguments
   my $fh_summ = $_[0];

   # --- reset file position
   local *FH_SUMM = $fh_summ;
   seek(FH_SUMM,0,0);

   # -- Check if there are any lines to process
   my @lines = <FH_SUMM>;
   unless ( @lines ){
      warn "${subname}__W> No lines in summary file\n";
      warn "${subname}__W>    Probably no hits were detected\n";
      return 0;
   }

   # --- read through filehandle
   my $ln; my $alicnt = 0;
   seek(FH_SUMM,0,0);
   while ( $ln = <FH_SUMM> ){
      chomp $ln;

      # --- Get the filename (assumes that it is the
      #     last column in the summary file)
      my $alnfile = (split(" ", $ln))[-1];

      # --- Check for the existence of the file
      return if ( ! -s $alnfile );

      # --- Increment alicount
      $alicnt++;
   }

   # --- Return the final result
   return $alicnt;
}


sub ReadProfileLines {

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 2;

   unless (scalar(@_) == $nargs){
      print "${subname}__E> Insufficient arguments\n" ;
      return;
   }

   # --- reassign input arguments
   my ($fh_prf, $seqid) = @_;

   # --- reset file position
   local *FH_PRF = $fh_prf;
   seek(FH_PRF,0,0);

   # --- read through file handle
   my ( $prf, $target ) = undef; my @templates = ();
   while ( $prf = <FH_PRF> ){
      chomp $prf;

      # --- Skip the comment (header) lines
      next if ( $prf =~ /^#/ );

      # --- Store the target line
      if ( ! defined($target) && (split(" ", $prf))[1] eq $seqid ) {
         $target = $prf;

      # --- Store the possible template lines
      } elsif ( (split(" ", $prf))[2] eq 'X' ) {
           push (@templates, $prf);
        }
   }

   # --- Return values
   return ($target, \@templates);
}

