#!/usr/bin/perl
# This file is part of ModPipe, Copyright 1997-2009 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

use Getopt::Long;
use File::Basename;
use File::Path;
use Cwd;
use strict;

# --- Load local modules
use PLLib::Utils;
use PLLib::Sequence;
use PLLib::Modeller;
use PLLib::TSVModUtils;

use MPLib::Version;
use MPLib::Binaries;
use MPLib::MPInit;
use MPLib::MPUtils;
use MPLib::MPModules;
use MPLib::MPSelectHits;
use MPLib::Serialize;

# --- Get command line options
$Getopt::Long::ignorecase = 0;

my ( $seqid, $conffile, $exitstage, $hitsmode, $evaluehits, $help, $tsvmod_flag,
     $natpdb, $natchn, $cleanup, $template, $template_option, $scrstat,@gather_options);

GetOptions (
           "conf_file=s"		=>	\$conffile,
	   "sequence_id=s"		=>	\$seqid,
	   "final_models_by=s"		=>	\@gather_options,
	   "score_by_tsvmod=s"		=>	\$tsvmod_flag,
	   "native_pdb=s"		=>	\$natpdb,
	   "native_chn:s"		=>	\$natchn,
	   "exit_stage=s"		=>	\$exitstage,
	   "hits_mode=s"		=>	\$hitsmode,
	   "evalue_hits=f"		=>	\$evaluehits,
           "set_score_statistics=s"     =>      \$scrstat,
	   "clean_up=s"		        =>	\$cleanup,
	   "template=s"			=>	\$template,
	   "template_option=s"		=>	\$template_option,
	   "help"			=>	\$help,
           "version"                    =>      sub { VersionMessage() },
           );

# --- Check command line options
if ( $help ){
   &usage;
   exit 0;
}

$tsvmod_flag=uc($tsvmod_flag);

# --- Get Program name
my $subrname = GetSubrName();

# --- Check for configuration file
die "${subrname}__E> Cannot proceed without configuration file\n"
   unless ( $conffile && -e $conffile );

# --- Check for sequence md5 hash
die "${subrname}__E> Cannot proceed without sequence id\n"
   unless ( $seqid );

# --- Read in the configuration file
die "${subrname}__E> Failed initializing ModPipe\n"
   unless ( ModPipeInit( $conffile ) );

# --- Check if sequence exists
my $seqdir = SeqDirMP( $seqid );
my $seqnam = SeqFileMP( $seqid );
die "${subrname}__E> Sequence file not found: $seqnam\n"
   unless ( -e $seqnam );

# --- Create the temporary directory for local stuff
my $tmpdir = "${init::tmpdir}/${seqid}";
MakeDirSys($tmpdir) ||
   die "${subrname}__E> Could not create temporary directory: $tmpdir: $!";

# --- Set default values
$exitstage  = 'NONE' unless ( $exitstage );
$hitsmode   = '111'  unless ( $hitsmode );
$cleanup    = 'ON'   unless ( $cleanup );
$evaluehits = 1.0    unless ( $evaluehits );
$scrstat   = 'ON' unless ( $scrstat =~ /OFF/i );

$natchn   = ' ' if ( ! $natchn || $natchn eq '' || $natchn eq '_' );
$natpdb     = undef unless ( $natpdb );
warn "${subrname}__M> Running in benchmarking mode. Target PDB: ${natpdb}\n"
   if ( defined($natpdb) );

# --- Store the current directory
my $currdir = cwd();

# --- Move into temporary directory
chdir( $tmpdir );

# -----------------------
# MODPIPE actions begin
# -----------------------

# --- Calculate needed profiles

# --- Calculate the BUILD_PROFILE profile if needed.
#     Needed for --hits_mode (digit2=) 0100, 0300, (digit3=) 0010, 0030, 
#     and (digit4=) 0002, 0003, 0004, 0005, 0006, 0007
my $d2 = substr($hitsmode, 1, 1);
my $d3 = substr($hitsmode, 2, 1);
my $d4 = substr($hitsmode, 3, 1);
if (   $d2 == 1 || $d2 == 3 || $d3 == 1 || $d3 == 3
   || $d4 > 1              ){
   unless ( GetProfile( $seqid ) ){
      warn "${subrname}__E> Failed to calculate BUILD_PROFILE profile\n";
      warn "${subrname}__E>    Sequence: $seqid\n";
      exit 1;
  }
}

# --- Calculate the PSI-Blast profile if needed.
#     Needed for --hits_mode (digit2=) 0200, 0300, (digit3=) 0020, 0030.
if (   $d2 == 2 || $d2 == 3 || $d3 == 2 || $d3 == 3 ){
   unless ( GetPsiBlastProfile( $seqid ) ){
      warn "${subrname}__E> Failed to calculate PSI-Blast profile\n";
      warn "${subrname}__E>    Sequence: $seqid\n";
      exit 1;
   }
}

# --- Quit if this is the exit stage
if ( $exitstage =~ /^PROFILE$/i ){
   warn "${subrname}__M> Chosen exit stage: $exitstage\n";
   warn "${subrname}__M>    ... Will stop here\n";
   chdir($currdir);
   CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );
   exit 0;
}

# --- Get hits against template database

# --- Open the hit file
my $hitfile = "${seqid}.hit";
my $fh_hit  = OpenNewFile( $hitfile );
my @outhits;

# --- Seq-Seq hits
if ( substr($hitsmode, 0, 1) == 1 ){
   GetHits100( $seqid, $natpdb, $natchn, \@outhits, $evaluehits )
      or die "${subrname}__E> Failed to get Seq-Seq hits: $seqid\n";
}

# --- Prf-Seq hits
if ( substr($hitsmode, 1, 1) == 1 || substr($hitsmode, 1, 1) == 3){
   GetHits010( $seqid, $natpdb, $natchn, \@outhits, $evaluehits )
      or die "${subrname}__E> Failed to get Prf-Seq hits: $seqid\n";
}

# -- Use PSI-Blast generated profiles 
#    (mode 020 for only this or 030 for 010 & 020)
if ( substr($hitsmode, 1, 1) == 2 || substr($hitsmode, 1, 1) == 3 ){
   GetHits020( $seqid, $natpdb, $natchn, \@outhits, $evaluehits )
      or die "${subrname}__E> Failed to get Prf-Seq hits: $seqid\n";
}

# --- Prf-Prf hits
if ( substr($hitsmode, 2, 1) == 1 || substr($hitsmode, 2, 1) == 3){
   GetHitsPP( $seqid, $natpdb, $natchn, \@outhits, $evaluehits, $scrstat,
      'BP', 'CCMAT', '0010', -100, -700, -70 )
      or die "${subrname}__E> Failed to get Prf-Prf hits (0010): $seqid\n";
}

# -- Use PSI-Blast generated profiles 
#    (mode 002 for only this or 003 for 001 & 002)
if ( substr($hitsmode, 2, 1) == 2 || substr($hitsmode, 2, 1) == 3 ){
   GetHitsPP( $seqid, $natpdb, $natchn, \@outhits, $evaluehits, $scrstat,
      'PB', 'CCMAT', '0020', -100, -700, -70 )
      or die "${subrname}__E> Failed to get Prf-Seq hits (0020): $seqid\n";
}

# --- Seq-Prf hits - 0001
if ( substr($hitsmode, 3, 1) == 1 || substr($hitsmode, 3, 1) == 3 || 
   substr($hitsmode, 3, 1) == 5 || substr($hitsmode, 3, 1) == 7){
   GetHitsPP( $seqid, $natpdb, $natchn, \@outhits, $evaluehits, $scrstat,
      'SS', 'SEQPRF', '0001', -450, -500, -50 )
      or die "${subrname}__E> Failed to get Seq-Prf hits (0001): $seqid\n";
}

# --- Cons-Prf hits with BuildProfile - 0002
if ( substr($hitsmode, 3, 1) == 2 || substr($hitsmode, 3, 1) == 3 || 
   substr($hitsmode, 3, 1) == 6 || substr($hitsmode, 3, 1) == 7){
   GetHitsPP( $seqid, $natpdb, $natchn, \@outhits, $evaluehits, $scrstat,
      'BP', 'CONS_BY_MAXPSSM', '0002', -450, -500, -50 )
      or die "${subrname}__E> Failed to get Seq-Prf hits (0002): $seqid\n";
}

# --- Cons-Prf hits with PSI-Blast - 0004
if ( substr($hitsmode, 3, 1) == 4 || substr($hitsmode, 3, 1) == 5 || 
   substr($hitsmode, 3, 1) == 6 || substr($hitsmode, 3, 1) == 7){
   GetHitsPP( $seqid, $natpdb, $natchn, \@outhits, $evaluehits, $scrstat,
      'BP', 'CONS_BY_MAXFREQ', '0004', -450, -500, -50 )
      or die "${subrname}__E> Failed to get Seq-Prf hits (0004): $seqid\n";
}

# --- Write the hit file
WriteHitsFile(\@outhits, $fh_hit);
close($fh_hit);
   
# -- Count the number of hits
my $hitcnt = scalar(@outhits);

# --- Quit if there are no hits to process
unless ( $hitcnt > 0 ){
   warn "${subrname}__M> No hits found against template database\n";
   warn "${subrname}__M>    ... Will stop here\n";
   chdir($currdir);
   CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );
   exit 0;
}

# --- If comparing input template with other templates, but input template
#     is not among hits, quit.
my $template_hit;
if ( $template && ($template ne "NONE")) {
   $template_hit = FindTemplate( \@outhits, $template,$template_option);
   unless ( $template_hit) {
      warn "${subrname}__M> Selected Template $template not in hits\n";
      warn "${subrname}__M>    ... Will stop here\n";

      # --- Copy the hitfile to seqdir for diagnostic purposes
      die "${subrname}__E> Failed copying hit file to Modpipe file structure: ${seqdir}/${hitfile}.no_template\n"
         unless( CopyFile($hitfile, "${seqdir}/${hitfile}.no_template") );

      CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );
   }
}

# --- Copy the hitfile to seqdir
die "${subrname}__E> Failed copying hit file to repository\n"
   unless( CopyFile($hitfile, "${seqdir}/${hitfile}") );

# --- Cluster hits (alignments) to remove redundancy

# --- Select hits for modeling by clustering
#     available alignments for redundancy

# --- Specify filenames
my $selfile = "${seqid}.sel";
my $fh_sel  = OpenNewFile( $selfile );

# -- Cluster alignments only if requested
#    else just copy the contents of the hit file
#    into the sel file
if ( $init::clusterali =~ /\bON\b/i && $hitcnt > 1){
   # --- Cluster alignments
   my ($representatives, $members) = SelectHits( $seqid, \@outhits)
      or die "${subrname}__E> Failed hit selection: $seqid\n";

   # --- If template-of-interest comparison, make sure TOI remains selected
   if ( $template && ($template ne "NONE")) {
      # ---See if template of interest among selected hits
      my $template_f
         = FindTemplate( $representatives, $template,$template_option);
      unless ( $template_f ) {
         push @$representatives, $template_hit;
         warn "${subrname}__M> Adding template of interest to clustered alignments\n";
      }
   }

   # --- Write the sel file
   if ( $template && ($template ne "NONE")) {
      if ($template_option eq "TEMPLATE") {
         $representatives=RemoveNonTemplate($representatives,$template);
      }   
   }
   WriteHitsFile($representatives, $fh_sel);
} else {

   warn "${subrname}__M> Alignments will not be clustered\n";
   warn "${subrname}__M>  - Option set to OFF or found single alignment\n";

   # -- Write hits to sel file
   my ($outhits);
   if ( $template && ($template ne "NONE")) {
      if ($template_option eq "TEMPLATE") {
         $outhits=RemoveNonTemplate(\@outhits,$template);
      } else {
         $outhits=\@outhits;
      }
   } else {
      $outhits=\@outhits;
   }
   WriteHitsFile($outhits, $fh_sel);
}

# -- Close the sel file
close( $fh_sel);

# --- Copy the selfile to seqdir
die "${subrname}__E> Failed copying sel file to repository\n"
   unless( CopyFile($selfile, "${seqdir}/${selfile}") );

# --- Quit if this is the exit stage
if ( $exitstage =~ /^ALIGNMENTS$/i ){
   warn "${subrname}__M> Chosen exit stage: $exitstage\n";
   warn "${subrname}__M>    ... Will stop here\n";
   chdir($currdir);
   CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );
   exit 0;
}

# --- Build models for all hits in the hitfile

# --- Open the mod file
my $modfile = "${seqid}.mod";
my $fh_mod  = OpenNewFile( $modfile );

# --- Build models
GetModels( $seqid, $fh_mod )
   or die "${subrname}__E> Failed to build models: $seqid\n";

# --- Close the mod file
close($fh_mod);

# --- Copy the modfile to seqdir
die "${subrname}__E> Failed copying mod file to repository\n"
   unless( CopyFile($modfile, "${seqdir}/${modfile}") );

# --- Rate models using TSVMod
#     Uses external TSVMod program to rate
#     Returns Method (MatchbySS, full or reduced(SF, SR), MatchbyTemplate (T)  
#     or "not enough similar structures" (NA), and predicted RMSD and NO35
my $newmodfile = ScoreByTSVMod (${init::tsvmodexe},$seqid,$tsvmod_flag)
   or warn "${subrname}__W> Failed scoring models by TSVMod\n";

# --- Copy the new modfile to seqdir
die "${subrname}__E> Failed copying mod file to repository\n"
   unless( CopyFile($newmodfile, ModFileMP($seqid)) );

# --- Rate models based on sequence/structure quality measures
#     This takes the model yaml file and introduces two new
#     entries - one with individual ratings and one with a 
#     combined rating (mpqs). The latter, for now, is simply the
#     sum of all individual scores.
my $newmodfile = RateModels( $seqid,$tsvmod_flag ) 
   or warn "${subrname}__W> Failed rating models\n";

# --- Copy the new modfile to seqdir
die "${subrname}__E> Failed copying mod file to repository\n"
   unless( CopyFile($newmodfile, ModFileMP($seqid)) );

# --- Move back to original directory
chdir($currdir);

# If gathering local, setup GatherModMP.py
my ($gather_options,$score,$option);
if ( scalar(@gather_options)>0) {
     $gather_options=" -c $conffile";
     $gather_options.=" --seq_id ${seqid}";
     $gather_options.=" --local_only True";
     my @allscores=("MPQS","ALL","DOPE","INPUT_TEMPLATE","LONGEST_GA341",
                 "SEQID","GA341","LONGEST_DOPE");
     if ($tsvmod_flag ne "OFF") {
         push @allscores,"TSVMOD";
     }
     my @temp;
     foreach $option(@gather_options) {
         push @temp,split(/\,/,$option);
     }
     @gather_options=@temp;
     foreach $score (@allscores) {
         foreach $option (@gather_options) {
             if ($score eq $option) {
                 if ($option eq "INPUT_TEMPLATE") {
                     if ( $template && ($template ne "NONE")) {
                         $gather_options.=" --template ${template}";
                     }
                 }
                 $gather_options.=" --final_models_by ${option}";
             }
         }
     }
               
    my $command = GetModPipeScript("main/GatherModMP.py")
                    . " $gather_options 2>&1";
    # -- Run Command
    if ( system( $command ) ){
       warn "${subrname}__E> Failed to collect models from repository\n";
       warn "${subrname}__E> $command\n";
       die  "${subrname}__E> ... Will exit\n";
    } 
}

# --- Clean up temporary directory
CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );

# --- Exit finally
exit 0;

sub RemoveNonTemplate {

   my $hits=shift@_;
   my $template=shift@_;
   my (@filtered_hits);
   foreach my $hit (@$hits) {
      foreach my $template_hit (@{$hit->templates}) {
         if ($template_hit->code . $template_hit->chain eq $template) {
            push @filtered_hits,$hit;
         }
      }
   }
   return \@filtered_hits;
}
# --- Usage
sub usage {
print <<EOF;

${0}:

      --version                 Report version number of this program.
      --help                    This help. Pipe it through 'more' if it
                                scrolls off the screen.
      --conf_file		ModPipe configuration file. Mandatory. 
      --sequence_id		Sequence Id (MD5 digest of the sequence 
                                in the ModPipe filesystem. Mandatory.
      --native_pdb              The PDB code of the target sequence. A
                                convenience feature for benchmarking with PDB sequences. 
      --native_chn              The chain identifier for native structure. (See above).
      --exit_stage		Choose the exit stage for the program. You can
				quit after one of the following: PROFILE,
				ALIGNMENTS, MODELS.  Default: NONE
      --hits_mode		Mode(s) for finding template hits, as a 4-digit
                                code. The following modes are available:
                                 * 1000: Sequence-Sequence search.
                                 * 0100: Profile-Sequence search using MODELLER
                                         profile.build() profiles.
                                 * 0200: Profile-Sequence search using PSI-BLAST
                                         profiles.
                                 * 0010: Profile-Profile search using MODELLER
                                         profile.build() profiles.
                                 * 0020: Profile-Profile search using PSI-BLAST
                                         profiles.
                                 * 0001: Sequence-Profile search.
                                 * 0002: Sequence-Profile search with
                                         Max-PSSM scoring.
                                 * 0004: Sequence-Profile search with
                                         Max-frequency scoring.
                                Multiple methods can be requested simply by
                                summing. For example, to calculate Seq-Seq
                                and Prf-Prf, --hits_mode=1010.  Default: 1110
      --evalue_hits		The E-value threshold to get hits against
				template databases. This value controls hits
				from all three searches.  Default: 1.0
      --set_score_statistics	This should switched to OFF when testing since
                                the test database does not have enough profiles.
                                Only applies to profile-profile alignments
                                or hitsmode 001, 002, 003.  Default: ON
      --clean_up		Flag to clean up the temporary directory
				after all operations ([ON], OFF). 
      --template                If present, creates models only if this PDB ID
                                and chain (sample: abcdA) is among hits from
                                the template database; also insures that the
                                input template remains after clustering.
      --template_option         [ALL], TEMPLATE. If --template is given, 
                                models [ALL] hits or only the hits for the TEMPLATE
      --score_by_tsvmod         [ON],OFF (in case tsvmod is not installed)
      --final_models_by		If present, performs model-gathering (.fin file) in
                                sequence directory. (MPQS,DOPE,INPUT_TEMPLATE,LONGEST_GA341,
                                SEQID,LONGEST_DOPE,GA341,TSVMOD)       
EOF
}
