#!/usr/bin/perl
# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

# AugmentPDB.pl - component of template-based modelling / leverage / 
# Structure Impact 

# For given template of interest ($seqid MD5 identifier), build a profile
# based on five iterations on uniprot90, create .pssm file, and create
# augmented PDB95 file.

# Based on ModPipe.pl
# Called by: TemplateBased.pl > WriteSGESI()

use Getopt::Long;
use File::Basename;
use File::Path;
use Cwd;
use strict;

# --- Load local modules
use PLLib::Utils;
use PLLib::Sequence;
use PLLib::Modeller;

use MPLib::Version;
use MPLib::Binaries;
use MPLib::MPInit;
use MPLib::MPUtils;
use MPLib::MPModules;
use MPLib::MPSelectHits;

# --- Get command line options
$Getopt::Long::ignorecase = 0;

my ( $seqid, $pdbidchain, $conffile, $pirfile, $exitstage, $hitsmode, 
     $evaluehits, $help, $natpdb, $natchn, $cleanup, $scrstat);

GetOptions (
           "conf_file=s"		=>	\$conffile,
           "pir_file=s"		        =>	\$pirfile,
	   "sequence_id=s"		=>	\$seqid,
           "pdb_id_chain=s"             =>      \$pdbidchain,
	   "native_pdb=s"		=>	\$natpdb,
	   "native_chn:s"		=>	\$natchn,
	   "exit_stage=s"		=>	\$exitstage,
	   "hits_mode=s"		=>	\$hitsmode,
	   "evalue_hits=f"		=>	\$evaluehits,
           "set_score_statistics=s"     =>      \$scrstat,
	   "clean_up=s"		        =>	\$cleanup,
	   "help"			=>	\$help,
           "version"                    =>      sub { VersionMessage() },
           );

# --- Check command line options
if ( $help ){
   &usage;
   exit 0;
}

# --- Get Program name
my $subrname = GetSubrName();

# --- Check for configuration file
die "${subrname}__E> Cannot proceed without configuration file\n"
   unless ( $conffile && -e $conffile );

# --- Check for sequence md5 hash
die "${subrname}__E> Cannot proceed without sequence id\n"
   unless ( $seqid );

# --- Read in the configuration file
die "${subrname}__E> Failed initializing configuration\n"
   unless ( ModPipeInit( $conffile ) );

# --- Check if sequence exists
my $seqnam = SeqFileMP( $seqid );
die "${subrname}__E> Sequence file not found: $seqnam\n"
   unless ( -e $seqnam );

# --- Create the temporary directory for local stuff
my $tmpdir = "${init::tmpdir}/${seqid}";
MakeDirSys($tmpdir) ||
   die "${subrname}__E> Could not create temporary directory: $tmpdir: $!";

# --- Set default values
$exitstage  = 'NONE' unless ( $exitstage );
$hitsmode   = '111'  unless ( $hitsmode );
$cleanup    = 'ON'   unless ( $cleanup );
$evaluehits = 1.0    unless ( $evaluehits );
$scrstat   = 'ON' unless ( $scrstat =~ /OFF/i );

$natchn   = ' ' if ( ! $natchn || $natchn eq '' || $natchn eq '_' );
$natpdb     = undef unless ( $natpdb );
warn "${subrname}__M> Running in benchmarking mode. Target PDB: ${natpdb}\n"
   if ( defined($natpdb) );

# --- Store the current directory
my $currdir = cwd();

# --- Move into temporary directory
chdir( $tmpdir );

# -----------------------
# Begin main calculations
# -----------------------

# --- Calculate a profile -- five iterations on UniProt90 (which is default
#     database in conf file)

   unless ( GetProfile( { 'seqid' => $seqid, 
                        } 
                      )
          ){
      warn "${subrname}__E> Failed to calculate BUILD_PROFILE profile\n";
      warn "${subrname}__E>    Sequence: $seqid\n";
      exit 1;
   }

   # --- Copy profile to new file name (consistent with PDB95 profile file
   #     names); re-write sequence name to PDB ID and chain; mark as structure
   #     ("X" rather than "S").
   my $seqdir = SeqDirMP( $seqid );
   my $prfnam = PrfFileMP( $seqid );
   my $new_prfnam = "${seqdir}/${pdbidchain}-uniprot90.prf";
   #                               ----+      ----1----+----2----+----3----+----4
   my $command;
   if (length($pdbidchain) == 5) {
       $command = "sed \"s/$seqid S/$pdbidchain                                    X/\" < ${prfnam} > ${new_prfnam}";
   } elsif (length($pdbidchain) == 4) {
       $command = "sed \"s/$seqid S/$pdbidchain                                     X/\" < ${prfnam} > ${new_prfnam}";
   }
   if ( system( $command ) ){
      warn "${subrname}__E> Failed to edit profile file\n";
      warn "${subrname}__E> $command\n";
      die  "${subrname}__E> ... Will exit\n";
   }

   # --- Quit if this is the exit stage
   if ( $exitstage =~ /^PROFILE$/i ){
      warn "${subrname}__M> Chosen exit stage: $exitstage\n";
      warn "${subrname}__M>    ... Will stop here\n";
      CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );
      exit 0;
   }

# --- Create PDB95 template database augmented with new template 

   # -- Make .pssm file 
   # -- Write the profile file name to MakePSSMDB.py input file list.
   my $makepssmdbinputfile = "makepssmdb.in";
   my $fh_makepssmdbin = OpenNewFile( $makepssmdbinputfile );
   print $fh_makepssmdbin "$new_prfnam\n";
   close( $fh_makepssmdbin );

   # -- Output to run directory
   my $pssmfile = "${currdir}/${seqid}.pssm";

   # -- Create the options hash for MakePSSMDB.py
   my $command = GetModPipeScript("python/MakePSSMDB.py")
                 . " $makepssmdbinputfile $pssmfile 2>&1";

   # -- Run command
   warn "${subrname}__M> Creating .pssm file...\n";
   if ( system( $command ) ){
      warn "${subrname}__E> Failed to make .pssm file\n";
      warn "${subrname}__E> $command\n";
      die  "${subrname}__E> ... Will exit\n";
   }

   # -- Check that PSSMDB file given in conf file
   defined( $init::xprfpssmdb ) 
      or die "${subrname}__E> XPRF_PSSMDB not given in conf file\n";

   warn "${subrname}__M> Creating PDB95 profiles augmented with new template ...\n";

   # -- Cat new .pssm file onto new PDB95.pssm file.
   $command = "cat $init::xprfpssmdb $pssmfile > ${currdir}/pdb95_aug_prf.pssm";
   # DKTEMP
   print "[AugmentPDB] $command\n";
   if ( system( $command ) ) { 
      die "${subrname}__E> Unable to execute $command\n";
   }

   # -- Cat name of .prf file onto new profile .list file.
   $command = "cat $init::xprflist $makepssmdbinputfile > ${currdir}/pdb95_aug_prf.list";
   if ( system( $command ) ) {
      die "${subrname}__E> Unable to execute $command\n";
   }

   # -- Check that PDB95 pir file given in conf file
   defined( $init::xprfpir ) 
      or die "${subrname}__E> XPRF_PIR not given in conf file\n";

   # -- Add sequence to PDB95 sequence file, and create corresponding .hdf5 
   #    file.
   my $aug_pdb_seqfile = "${currdir}/pdb95_aug_prf.pir";
   $command = "cat $init::xprfpir $pirfile > $aug_pdb_seqfile";
   # DKTEMP
   print "[AugmentPDB] $command\n";
   if ( system( $command ) ) {
      die "${subrname}__E> Unable to execute $command\n";
   }

   # -- Create the options hash for MakeHDF5.py
   my $hdf5_file = "${currdir}/pdb_95_aug.hdf5";
   my %makehdf5opt = (
                   "-f" => "PIR",
                   );
   $command = GetModPipeScript("lib/python/modpipe/scripts/MakeHDF5.py") .
              " @{[ %makehdf5opt ]} $aug_pdb_seqfile $hdf5_file 2>&1";

   # -- Run command
   if ( system( $command ) ){
      warn "${subrname}__E> Failed to make augmented template file $aug_pdb_seqfile\n";
      warn "${subrname}__E> $command\n";
      die  "${subrname}__E> ... Will exit\n";
   }


# --- Move back to original directory
chdir($currdir);

# --- Clean up temporary directory
CleanupTMP($tmpdir) if ( $cleanup =~ /\bON\b/i );

# --- Exit finally
warn "${subrname}__M> Normal completion\n";
exit 0;


# ------------------------------------------------------------------------------
# --- Usage
sub usage {
print <<EOF;

${0}:

     --conf_file		ModPipe configuration file. Cannot proceed 
				without this option.

     --sequence_id		Sequence Id. This is the MD5 digest of the 
   				sequence that has been added to the ModPipe 
				filesystem. Cannot proceed without this 
				option.

     --native_pdb               The PDB code of the target sequence. A
                                convenience feature that can be used when
                                benchmarking with PDB sequences. 

     --native_chn               The chain identifier for native structure. (See 
                                above).

     --exit_stage		Choose the exit stage for the program. You can
				quit after one of the following: PROFILE,
				ALIGNMENTS, MODELS.
				Default: NONE

     --hits_mode		Mode for calculating template hits. It is a
				four-letter code containing 1 (true) or 0
				(false) for each of Seq-Seq, Prf-Seq, Prf-Prf,
				and Seq-Prf, respectively. For instance, to
				calculate Seq-Seq and Prf-Prf, set it to 101.
				Default: 1110

     --evalue_hits		The E-value threshold to get hits against
				template databases. This value controls hits
				from all three searches.
				Default: 1.0

     --set_score_statistics	This should switched to OFF when testing since
                                the test database does not have enough profiles.
                                Only applies to profile-profile alignments
                                or hitsmode 001, 002, 003.
                                Default: ON

     --clean_up			Flag to clean up the temporary directory
				after all operations. Can be OFF or ON.
				Default: ON

     --help                     This help. Pipe it through 'more' if it
                                scrolls off the screen.
EOF
}
