#!/usr/bin/perl
# This file is part of ModPipe, Copyright 1997-2009 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

# Template-based modeling / "Leverage" / "Structure Impact".

# For given template of interest (new structure), model all sequences
# that are matches (hits) to template of interest.  Build models for each 
# sequence with all potential templates from PDB95 ("old structures") and the 
# template of interest.
# Used to measure the impact -- how many sequences can be newly modelled, or 
# better modelled with the new structure.
# Also used to force the modeling of a sequence set using a specific PDB file

use strict;
use Getopt::Long;
use File::Basename;
use File::Glob ':glob';
use Cwd;

# --- Load local modules
use PLLib::Utils;
use PLLib::Sequence;
use PLLib::Modeller;
use PLLib::PDBUtils;

use MPLib::Version;
use MPLib::Binaries;
use MPLib::MPInit;
use MPLib::MPUtils;
use MPLib::MPModules;

# --- Get command line options
$Getopt::Long::ignorecase = 0;

my ( $templatepdbid, $templatepdbchain,
     $fullsequences_fasta, $exitstage, $hitsmode, $evaluehits, $help,
     $cleanup, $tmpdir_profile,$tmpdir_sequence, $datdir, 
     $runname, $rundir, $selmodby, $finfile, $runwhere);

my ( $logdir, $jobname, $modpipe, $disks, $nodes, $max_sequences, $tsvmod_flag,
     $priority, $help, $sgefile, $modfile, $hitfile, $conffile,$template_option,$pdb_file );

my @final_modby = ();
my $profile_update="ON";

GetOptions (
           "pdb_id=s"                  =>      \$templatepdbid,
           "pdb_chain=s"               =>      \$templatepdbchain,
           "pdb_file=s"               =>       \$pdb_file,

           "fullseqs_file=s"           =>      \$fullsequences_fasta,
           "profile_update=s"           =>     \$profile_update,
           "template_option=s"           =>     \$template_option,

           "exit_stage=s"              =>      \$exitstage,
           "hits_mode=s"               =>      \$hitsmode,
           "score_by_tsvmod=s"             =>  \$tsvmod_flag,
           "evalue_hits=f"             =>      \$evaluehits,
           "clean_up=s"                =>      \$cleanup,
           "runname=s"                 =>      \$runname,
           "run_directory=s"           =>      \$rundir,
           "output_models_file=s"      =>      \$modfile,
           "output_hits_file=s"        =>      \$hitfile,
           "final_models_by=s"         =>      \@final_modby,
           "output_final_models=s"     =>      \$finfile,
           "run_where=s"               =>      \$runwhere,
           "max_sequences=s"           =>      \$max_sequences,
           "help"                      =>      \$help,
           "version"                   =>      sub { VersionMessage() },

           "tmpdir_profile=s"          =>      \$tmpdir_profile,
           "tmpdir_sequence=s"         =>      \$tmpdir_sequence,
           "conf_file=s"               =>      \$conffile,
           "datdir=s"                  =>      \$datdir,

           "log_directory=s"           =>      \$logdir,
           "job_name=s"                =>      \$jobname,
           "modpipe=s"                 =>      \$modpipe,
           "disks=s"                   =>      \$disks,
           "nodes=s"                   =>      \$nodes,
           "priority=i"                =>      \$priority,
           "sge_file=s"                =>      \$sgefile,
           );

# --- Check command line options
if ( $help ){
   &usage;
   exit 0;
}
if (!$max_sequences) {
    $max_sequences=30000;
}

# --- Get Program name
my $subrname = GetSubrName();

# -- Check mandatory options
# -- PDB ID.  Convert to lowercase.
die "${subrname}__E> Cannot proceed without a PDB ID\n"
  unless ( $templatepdbid );
$templatepdbid = lc( $templatepdbid );

# -- Chain ID.  Convert to lowercase if given.
$templatepdbchain = lc( $templatepdbchain );
my $ucchain = uc( $templatepdbchain );

my $final_modby_str = join ",", @final_modby;
$final_modby_str =~ s/\s//g;

# -- Set the default values
$modpipe = GetModPipeScript("main/ModPipe.pl") unless ( $modpipe );
$exitstage  = 'MODELS' unless ( $exitstage );
$hitsmode   = '1001'  unless ( $hitsmode );
$evaluehits = 1.0    unless ( $evaluehits );
$runname    = "TB-$templatepdbid$ucchain" . time unless ( $runname );
$cleanup    = 'ON'   unless ( $cleanup );

my $current_user=getlogin();
$tmpdir_profile     = "/scratch/${current_user}/TB/${runname}" unless ( $tmpdir_profile );
$tmpdir_sequence     = "/scratch/${current_user}/TB/${runname}" unless ( $tmpdir_sequence );
$rundir     = cwd() . "/${runname}" unless ( $rundir );
$datdir     = "${rundir}/data" unless ( $datdir );

$logdir   = "${rundir}/sge-logs" unless ( $logdir );
$jobname  = 'ModPipe-TB' unless ( $jobname );
$priority = -10 unless ( defined( $priority ) );
$sgefile  = 'sge-modpipe.csh' unless ( $sgefile );

$modfile = "${runname}.mod" unless ( $modfile );
$hitfile = "${runname}.hit" unless ( $hitfile );
$finfile = "${runname}.fin" unless ( $finfile );
push @final_modby, 'SEQID','MPQS' unless ( @final_modby );

$runwhere = 'SGE' unless ( $runwhere );

# -- Verify configuration options
die "${subrname}__E> RUN_DIRECTORY option should have an absolute path\n"
   unless ( $rundir =~ /^\// );

# -- Create the run directory
die "${subrname}__E> Failed making run directory: $rundir\n"
  unless ( CheckDir( $rundir ) );

# -- Verify configuration options
die "${subrname}__E> DATDIR option should have an absolute path\n"
   unless ( $datdir =~ /^\// );

# -- Create the data directory
die "${subrname}__E> Failed making data directory: $datdir\n"
  unless ( CheckDir( $datdir ) );

# --- Check if log directory exists
die "${subrname}__E> Could not create log directory: $logdir\n"
   unless ( CheckDir( $logdir ) );

# -- Print Time
printf "%s\n", GetTime();

# -- Move into rundir
chdir($rundir);

# -- Extract chains from template-of-interest PDB.
   # -- Read the template ModPipe config file
   my ($template_conf ,$sequence_conf);
   # Abuse of GetModPipeScript...
   unless (-e $conffile) {$conffile = GetModPipeScript("web/conf/template_modpipe.conf")};
   die "${subrname}__E> Failed reading template ModPipe configuration\n"
      unless ( $template_conf = ReadConf($conffile) );

  # -- Check that PDB file exists.
  my $pdbrepository = $template_conf->{'PDB_REPOSITORY'};
  my ($templatepdbfile,$local_pdbrepository);
  if ($pdb_file && -e $pdb_file) {
      my @tmp=split(/\//,$pdb_file);
      $templatepdbfile=pop(@tmp);
      $local_pdbrepository=join("/",@tmp);
      $templatepdbfile=$pdb_file;
  } else {
      $templatepdbfile = CheckPDBFile($templatepdbid, $pdbrepository);
  }
  
  die "$subrname}_E> Could not find PDB file ${templatepdbfile}\n"
     unless ( -e $templatepdbfile );

  # -- Run MakeChains.py. 
  # Write the PDB file name to MakeChains.py input file list.
  my $makechainsinputfile = "makechains.in";
  my $fh_makechainsin = OpenNewFile( $makechainsinputfile );
  print $fh_makechainsin "$templatepdbid\n";
  close( $fh_makechainsin );

  # -- If chain not specified, put all chains into one file.  Erase existing,
  #    if any.
  my $chainsfile;
  my %makechainsopt = (
                      "-p" => $local_pdbrepository||$pdbrepository,
                      "-f" => 'FASTA',
                      "-s" => 'structureE',
                      );
  if ( ! $templatepdbchain ) {
     $chainsfile = "${templatepdbid}.chn";
#     $makechainsopt{"-o"} = $chainsfile;
     if ( -e $chainsfile ) {
        unlink( $chainsfile );
     }
  }
  my $command = GetModPipeScript("python/MakeChains.py")
                . " @{[ %makechainsopt ]} $makechainsinputfile 2>&1";

  # -- Run command
  warn "${subrname}__M> Extracting sequence for chain(s) from PDB file for ${templatepdbid} (FASTA)...\n";
  if ( system( $command ) ){
     warn "${subrname}__E> Failed extracting PDB sequence\n";
     warn "${subrname}__E> $command\n";
     die  "${subrname}__E> ... Will exit\n";
  }

# -- If chain specified on command line, just put in list.
#    Otherwise, if multiple chains, check that not near-duplicates.
my @chainfiles;
if ( $templatepdbchain ) {
   push @chainfiles, "${templatepdbid}${ucchain}.chn";
} else {

  opendir(DIR, ".");
  my @chainsfiles = grep(/\.chn$/,readdir(DIR));
  closedir(DIR);

  my $n_chains = scalar( @chainsfiles );

  if ( $n_chains > 1 ) {
     # -- Identify heteromeric chains.  First, replace blank on header line
     #    (between PDB code and chain ID) so Modeller will read both.
     my $allchains="${templatepdbid}.chn";
     open ("ALLCHAINS",">$allchains");
     foreach my $file (@chainsfiles) {
         open ("F","$file");
         while (my $line=<F>) {
             print ALLCHAINS $line;
         }
         close (F);
     }
     close (ALLCHAINS);
    
     # -- Delete duplicates.
     my %uniqueseqopt = (
                         "-i" => $allchains,
                         "-f" => 'FASTA',
                        );
     my $command = GetModPipeScript("lib/python/modpipe/scripts/UniqueSeq.py")
                   . " @{[ %uniqueseqopt ]} 2>&1";
     warn "${subrname}__M> Removing duplicate chains...\n";
     if ( system( $command ) ){
        die "${subrname}__E> Failed de-duping chains file\n";
     }
     
     # -- Get list of chain files.
     @chainfiles = bsd_glob( "${templatepdbid}?.fasta" );
     if ( scalar( @chainfiles ) == 1 ) {
        $ucchain = substr( $chainfiles[0], 4, 1 );
     }
  } else {
     # -- Just one chain in file.  Use file as is.  Get chain ID.
     chomp( $chainsfiles[0] );
     $chainsfiles[0]=~s/\>//g;
     $ucchain = substr( $chainsfiles[0],4,1 );
  }
}

# -- Run the main routine for each chain.  If more than one chain to run, 
#    create separate rundir for each.
my $n_chainfiles = scalar( @chainfiles );
my $base_rundir;
my $base_datdir;
if ( $n_chainfiles > 1 ) {
   $base_rundir = $rundir;
   $base_datdir = $datdir;
}

for ( my $i_chain=0; $i_chain<$n_chainfiles; $i_chain++ ) {
   my $chainfile = $chainfiles[$i_chain];

   # -- If more than one chain, create sub-rundir and sub-datdir for this chain,
   #    and chdir there.
   if ( $n_chainfiles > 1 ) {
      chdir( $base_rundir );
      $rundir = $base_rundir . $i_chain;
      $datdir = "${rundir}/data";
      die "${subrname}__E> Failed making run directory: $rundir\n"
         unless ( CheckDir( $rundir ) );
      die "${subrname}__E> Failed making data directory: $datdir\n"
         unless ( CheckDir( $datdir ) );

      my $target = "${rundir}/" . basename( $chainfile );
      CopyFile( $chainfile, $target );

      # -- Set chain ID.
      $ucchain = substr( $chainfile, 4, 1 );

      chdir( $rundir );
   }
   main( $chainfile );
}


# ------------------------------------------------------------------------------
sub main {

   # --- Get subroutine name
   my $subrname = GetSubrName();

   # --- Check arguments
   my $nargs = 1;

   unless ( scalar(@_) == $nargs ){
      print "${subrname}__D> Insufficient arguments\n";
      return;
   }

   # --- Reassign input arguments
   my ( $chainfile ) = @_;

   # Check that chain file exists, use it.
   my $original_chainfile;
   unless (-e $chainfile) {
       warn "${subrname}_W> Could not find chain file $chainfile\n";
       warn "${subrname}_W> Using Chain A instead\n";
       
       $chainfile=substr($chainfile,0,4)."A.chn";
       $ucchain="A";
   }
   unless ( -e $chainfile ) {
       die "${subrname}_E> Could not find chain file $original_chainfile and $chainfile\n"
   }

   # -- Open the configuration file
   my $conffile = "$rundir/modpipe.conf";   # configuration file for the 
					    # augmentation of the databases, 
				            # and creation of full uniprot profile

   my $seq_conffile = "$rundir/modpipe_sequence.conf";    # configuration file for
						          # the modeling of all sequences
							  # in the uniprot profile
   my $fh_conf = OpenNewFile( $conffile ) or
      die "${subrname}__E> Could not open file $conffile\n";


   # -- Override some entries in the template config file
   $template_conf->{'DATDIR'} = $datdir;
   $template_conf->{'TMPDIR'} = $tmpdir_profile . $ucchain;

   my $pdbidchain = "${templatepdbid}${ucchain}";

   # -- See if template-chain of interest is a representative of a PDB95 
   #    cluster -- don't need augmented file, can use existing uniprot90
   #    profile.

   # -- Grep for template-chain in PDB95 list.
   my $listfile = $template_conf->{'XPRF_LIST'};
   defined( $listfile ) or
      die "${subrname}__E> XPRF_LIST not given in conf file\n";

   if ( ! -e $listfile ) {
      die "${subrname}__E Could not XPRF_LIST file $listfile\n";
   }
   my $command = "grep $pdbidchain $listfile";
   my $result = `$command`;
   my $use_pdb95_prf_f = 0;
   if ( $result ) {
      # -- Template of interest is a cluster representative.  Set flag to copy 
      #    profile file (to seqdir file named <seqmd5id>-uniprot90.prf).
      $use_pdb95_prf_f = 1;
   } else {
      # -- Template of interest not in PDB95 list.  Reset conf file to use 
      #    augmented PDB file (used in ModPipe).
      $template_conf->{'TEMPLATESEQDB'} = "${rundir}/pdb_95_aug.hdf5";
   }

   # -- Write the configuration file
   WriteConf($fh_conf, $template_conf);
   close($fh_conf);

   # -- Do only if full-length sequences file not given.
   #    Go through steps to build profiles and find matching sequences.

   my $external_sequences="ON";
   if ( ! $fullsequences_fasta ) {
      $external_sequences="OFF";

      # --- Read in the configuration file into init namespace
      die "${subrname}__E> Failed initializing configuration\n"
         unless ( ModPipeInit( $conffile ) );


      # Also, extract chain as PIR file with GetChains.py
        # Write the PDB ID and chain to GetChains.py input file list.
        my $getchainsinputfile = "getchains.in";
        my $fh_getchainsin = OpenNewFile( $getchainsinputfile );
        print $fh_getchainsin "${templatepdbid}${ucchain}\n";
        close( $fh_getchainsin );

        my $pirfile = "${rundir}/${templatepdbid}${ucchain}.pir";

        # -- Create the options hash for GetChains.py
        my %getchainsopt = (
                        "-f" => $getchainsinputfile,
                        "-o" => $pirfile,
                        );
        my $command =GetModPipeScript("lib/python/modpipe/scripts/GetChains.py")
                     . " @{[ %getchainsopt ]} 2>&1";

        # -- Run command
        warn "${subrname}__M> Extracting sequence for chain ${ucchain} from PDB file for ${templatepdbid} (PIR)...\n";
        if ( system( $command ) ){
           warn "${subrname}__E> Failed extracting PDB sequence\n";
           warn "${subrname}__E> $command\n";
           die  "${subrname}__E> ... Will exit\n";
        }

      # -- Add sequence(s) to repository
        # -- Create the options hash for AddSeqMP
        my %addseqopt = (
                        "--conf_file"         => $conffile,
                        "--sequence_file"     => $chainfile,
                        );

        # -- Call AddSeqMP to add sequences
        warn "${subrname}__M> Adding sequence to repository...\n";
        my $command = GetModPipeScript("main/AddSeqMP.py")
                      . " @{[ %addseqopt ]} 2>&1";

        # -- Run Command
        if ( system( $command ) ){
           warn "${subrname}__E> Failed adding sequences to repository\n";
           warn "${subrname}__E> $command\n";
           die  "${subrname}__E> ... Will exit\n";
        }

        # -- Check for unq file (created by AddSeqMP).
        my $chainfileunq = fileparse($chainfile, '\..*') . '.unq';
        die "${subrname}__E> Could not find file with unique MD5 ids: $chainfileunq\n"
           unless ( -e $chainfileunq );

        # -- Read in the MD5 ids
        my ($ids, $names) = ReadUNQ( $chainfileunq );
        my $seqmd5id = @$ids[0];
        printf "%s %8d\n", "${subrname}__M> No. of sequences added: ", 
               scalar(@$ids);

	$sequence_conf=$template_conf;
        $sequence_conf->{'TMPDIR'} = $tmpdir_sequence . $ucchain;
        if ( $use_pdb95_prf_f ) {
           # -- Use existing profile.  Copy to seqdir.  Check that PDB95 data
           #    directory given in conf file
           my $pdb95datdir = $template_conf->{'XPRF_DATDIR'};
           defined( $pdb95datdir ) 
              or die "${subrname}__E> XPRF_DATDIR not given in conf file\n";

           my $seqdir = SeqDirMP( $seqmd5id );
           my $subdir = substr( $pdbidchain, 1, 2 );
           my $pdb95prf = "${pdb95datdir}/${subdir}/${pdbidchain}/${pdbidchain}-uniprot90.prf";
           my $prffile = "${seqdir}/${seqmd5id}-uniprot90.prf";
           my $command = "cp $pdb95prf $prffile";

           warn "${subrname}__M> Using existing PDB95 cluster profile for ${templatepdbid}${ucchain}\n";
           if ( system( $command ) ){
              die  "${subrname}__E> Could not copy existing profile\n";
           }
        } else {
           # -------------------------------------------------------------------------
           # -- First part - AugmentPDB.pl -- 
           # -- Make profile with five iterations against UniProt90, create
           #    augmented PDB95 template/profile file.
           #    create modified modpipe_sequence.conf, to include the augmented files

           # -- Form the options hash for AugmentPDB.pl
           my %mpopt = (
                         "--conf_file"       => $conffile,
                         "--pir_file"        => $pirfile,
                         "--hits_mode"       => $hitsmode,
                         "--exit_stage"      => $exitstage,
                         "--evalue_hits"     => $evaluehits,
                         "--clean_up"        => $cleanup,
                         "--sequence_id"     => @$ids[0],
                         "--pdb_id_chain"    => $pdbidchain,
                       );

           my $command = GetModPipeScript("src/AugmentPDB.pl")
                         . " @{[ %mpopt ]}";

           # -- Run the command
           warn "${subrname}__M> Creating profile (5 iterations against uniprot90)...\n";
           if ( system( $command ) ) {
              die "${subrname}__E> AugmentPDB.pl failed\n";
           }
           $sequence_conf->{'XPRF_PIR'} = "${rundir}/pdb95_aug_prf.pir";
           $sequence_conf->{'XPRF_LIST'} = "${rundir}/pdb95_aug_prf.list";
           $sequence_conf->{'XPRF_PSSMDB'} = "${rundir}/pdb95_aug_prf.pssm";
        }
        my $sh_conf = OpenNewFile( $seq_conffile ) or
            die "${subrname}__E> Could not open file $seq_conffile\n";
        WriteConf($sh_conf, $sequence_conf);
        close($sh_conf);


        # -------------------------------------------------------------------------
        # -- Second part - GetFullSeqs.pl -- 
        # -- Find matching sequences using that profile with one iteration 
        #    against full UniProt, retrieve full-length versions of those
        #    sequences.  FASTA file saved in repository (file name:
        #    "<MD5-sequid>-profile_fullsequences", in data directory).

        # -- Form the options hash for GetFullSeqs
        my %mpopt = (
                      "--conf_file"       => $conffile,
                      "--hits_mode"       => $hitsmode,
                      "--profile_update"  => $profile_update,
                      "--exit_stage"      => $exitstage,
                      "--max_sequences"   => $max_sequences,
                      "--evalue_hits"     => $evaluehits,
                      "--clean_up"        => $cleanup,
                      "--sequence_id"     => @$ids[0],
                    );

        my $command = GetModPipeScript("src/GetFullSeqs.pl")
                      . " @{[ %mpopt ]}";

        # -- Run the command
        warn "${subrname}__M> Finding sequences in full uniprot (one iteration)...\n";
        if ( system( $command ) ) {
           die "${subrname}__E> GetFullSeqs.pl failed\n";
        }

      # ---------------------------------------------------------------------------
      # -- Submit the full-length sequences to ModPipe, using augmented PDB95
      #    file (TEMPLATESEQDB in conf file, set above).

      # -- Copy profile full-length sequences from repository to rundir
      my $seqdir=SeqDirMP($seqmd5id);
      $fullsequences_fasta = "${seqdir}/${seqmd5id}-profile_sequences.fsa";

   } # endif ( ! $fullsequences_fasta )

   my $newseqfile = "${rundir}/" . basename($fullsequences_fasta);

   # -- Copy the sequence file into rundir only if it does not exist already
   # -- This is important for the case of an imported custom sequence file, not
   # -- the regular sequence file that is harvested from the profile. 

   unless ((-e $newseqfile ) && ($external_sequences eq "ON")){
     unless( CopyFile($fullsequences_fasta, $newseqfile) ){
       warn "${subrname}__E> Could not copy full-length sequences into run directory\n";
       warn "${subrname}__E> Run Directory: $rundir\n";
       die  "${subrname}__E> Full-sequences File: $fullsequences_fasta\n";
     }
   }

   # -- Re-assign sequence filename
   my $seqfile = basename($newseqfile);
   unless (-s $newseqfile) {
      warn "${subrname}__E> $newseqfile contains 0 sequences\n";
      die "${subrname}__E> Stopping here ...\n";
   }


   # -- Add sequences to Modpipe data structure
     # -- Create the options hash for AddSeqMP
     my %addseqopt = (
                     "--conf_file"         => $seq_conffile,
                     "--sequence_file"     => $seqfile,
                     );

     # -- Call AddSeqMP to add sequences
     my $command = GetModPipeScript("main/AddSeqMP.py")
                   . " @{[ %addseqopt ]} 2>&1";

     # -- Run Command
     warn "${subrname}__M> Adding full-length sequences to Modpipe data structure ...\n";
     if ( system( $command ) ){
        warn "${subrname}__E> Failed adding full-length sequences to Modpipe data structure \n";
        warn "${subrname}__E> $command\n";
        die  "${subrname}__E> ... Will exit\n";
     }

     # -- Check for unq file
     my $unqfile = fileparse($seqfile, '\..*') . '.unq';
     die "${subrname}__E> Could not find file with unique MD5 ids: $unqfile\n"
        unless ( -e $unqfile );

     # -- Read in the MD5 ids
     my ($ids, $names) = ReadUNQ( $unqfile );
     printf "%s %8d\n", "${subrname}__M> No. of sequences added: ", scalar(@$ids);

     if ( $runwhere =~ /\bSGE\b/ ){
       # -- Submit job to the queue

       # -- Verify that qsub is in the path
       my $qsubexe = `which qsub 2> /dev/null` or
         die "${subrname}__E> Cannot find qsub in path\n";
       chomp $qsubexe;

       # -- Create the SGE job file
       # -- Open the SGE job file
       my $sgefh = OpenNewFile($sgefile);

       # -- Write SGE job file
       WriteSGEMP($sgefh, $logdir, $jobname, $modpipe, $disks, $nodes,
                  $priority, $seq_conffile, $exitstage, $hitsmode, $evaluehits,
                  $cleanup,$tsvmod_flag,$final_modby_str, scalar(@$ids), $ids, $tmpdir_sequence, $pdbidchain,$template_option);

       # -- Close the SGE job file handle
       close($sgefh);
     
       # -- Submit job and collect jobid
       warn "${subrname}__M> Submitting ModPipe to cluster...\n";
       my $jobid = `qsub $sgefile`;
       chomp $jobid;
       $jobid = (split(" ",$jobid))[2];
       $jobid =~ s/\..*//;
       warn "${subrname}__M> Successfully submitted ModPipe job: $jobid\n";
       if ($jobid eq "") {
          die "${subrname}__E> Error submitting ModPipe job $sgefile\n";
       }

       # -- wait for job completion and collect results
       while ( 1 ){
         # -- Sleep for a while
         sleep 120; 

         # -- Check status of job
         my $qstatout = `qstat -j $jobid 2>&1`;
         last if ($qstatout =~ /^Following jobs do not exist/);
       }

       warn "${subrname}__M> ModPipe job $jobid completed.\n";
     } else {
       # -- Run the job locally

       # -- Form the options hash for ModPipe
       my %mpopt = (
                     "--conf_file"       => $seq_conffile,
                     "--hits_mode"       => $hitsmode,
                     "--exit_stage"      => $exitstage,
                     "--evalue_hits"     => $evaluehits,
                     "--clean_up"        => $cleanup,
                     "--final_models_by" => $final_modby_str,
                   );

       # -- Run the sequences one at a time
       foreach my $oneid ( @$ids ){
          # -- Fix the sequence id
          $mpopt{"-sequence_id"} = $oneid;
          my $command = GetModPipeScript("main/ModPipe.pl")
                        . " @{[ %mpopt ]} 2>&1";

          # -- Run the command
          open(RUN, "$command |");
          while ( my $run = <RUN> ){
             print $run;
          }
          close(RUN);
       }
     }


   # ---------------------------------------------------------------------------
   # --- Run the GatherModMP script to pick out the best models for each
   #     sequence (modeled by template of interest and competing templates).
   #     These will be loaded into ModBase by ModWebd -> ModBaseImport.py.

      # --- Reformat the final_modby array into a string to be
      #     passed to the GatherModMP script.

      # -- Create the options hash
      my %collectmod = (
                        "--conf_file"            => $conffile,
                        "--unq_file"             => $unqfile,
                        "--output_modfile"       => $modfile,
                        "--output_hitfile"       => $hitfile,
                        "--output_finfile"       => $finfile,
                        "--gather_fast" 	 => "True",
                        "--template"	         => $pdbidchain,
                        "--final_models_by"      => $final_modby_str,
                       );

     # -- Call GatherModMP to select models
     my $command = GetModPipeScript("main/GatherModMP.py")
                   . " @{[ %collectmod ]} 2>&1";

     warn "${subrname}__M> Gathering best models...\n";
     # -- Run Command
     if ( system( $command ) ){
        warn "${subrname}__E> Failed to collect models from repository\n";
        warn "${subrname}__E> $command\n";
        die  "${subrname}__E> ... Will exit\n";
     }

     # -- Also, create a symbolic link to the fullsequences.unq file with the
     #    name that ModBaseImport.py expects.
     my $modbaseimport_unqfile = "${runname}.unq";
     symlink($unqfile,$modbaseimport_unqfile) or die ("${subrname}__E> Unable to symlink $unqfile file to ModBaseImport name $modbaseimport_unqfile\n");
}


# -- Print Time
printf "%s\n", GetTime();

#--- exit
exit 0;

# -- Subroutines

# --- Usage
sub usage {
print <<EOF;

${0}:

TemplateBased.pl calculates all models based on an input template. 
This is used to measure the modeling leverage of a new structure ("template")
or to trigger the modeling of an input sequence using the input template. 
It identifies matching sequences in UniProt (FULLSEQDB in the configuration file), 
runs ModPipe on each, and runs GatherModMP to collect the best models for each 
sequence modeled, including the best model of the input template.
An option can be set for modeling only the input-template hits, or all hits from
the template databases, including the input template. 

Options:

      --pdb_id                  Four-character PDB code of the input template 
      --pdb_chain               Single-character chain code.  If not specified,
                                each non-redundant chain will be used. 
      --pdb_file 		optional, needed if template file is not in standard 
                                repository
      --conf_file		optional, default file in modpipe repository: 
				web/conf/template_modpipe.conf
      --template_option         [ALL],TEMPLATE: specifies whether in the modeling
                                step, the whole template database should be used, 
                                or only the input template
      --fullseqs_file           If specified, the full-length sequences to be
                                modeled will be read from this file in FASTA
                                format -- the steps to identify matching 
                                sequences in UniProt will be skipped.
      --profile_update		[ON],OFF: Update of the full uniprot profile, 
                                in case it already exists. 
      --runname                 A runname for the TemplateBased run. If not
                                specified it will create one of the form
                                TB-template-<time>. A directory of this name
                                will be created in the current working path.
      --run_directory           The directory from where the jobs should be
                                run. All data will be written into this
                                directory. This should be specified with an
                                absolute path.
                                Default: [PWD]/<runname>
      --output_models_file      Filename to store the data about the models
                                produced.  Default: <runname>.mod
      --output_hits_file        Filename to store the data about the hits
                                produced. Typically to figure how many hits were
                                found.
                                Default: <runname>.hit
      --final_models_by         Scheme to choose the final models by. Possible
                                values are LONGEST_DOPE, LONGEST_GA341, SEQID
                                GA341, DOPE, MPQS, ALL.  Multiple options can
                                be specified by multiple copies of the command
                                line switch.  For example, "--final_models_by
                                LONGEST_DOPE --final_models_by SEQID" will
                                return two models.  Default: SEQID and MPQS
      --output_final_models     Filename to store the models that pass the
                                model selection (see above).
                                Default: <runname>.fin
      --run_where               Where to run the job. Values are SGE or LOCAL.
                                The LOCAL is present mainly for testing purposes.
      --max_sequences           Maximum number of sequences harvested from the Profile.
				Default: 30,000
      --help                    This help. Pipe it through 'more' if it
                                scrolls off the screen.
MODPIPE Configuration Options:
  These options will be used to create the ModPipe configuration file.
  See example file in \${MODPIPEBASE}/tests/modpipe/modpipe.conf for
  definitions.

      --tmpdir_profile          TMPDIR: Temporary directory on the nodes
				for the profile creation. 
                                Default: /scratch/username/TB/<runname>
      --tmpdir_sequence         TMPDIR: Temporary directory on the nodes
				for the modeling of the sequences
                                Default: /scratch/username/TB/<runname>
      --datdir                  DATDIR: Data directory to store results.
                                Default: <run_directory>/data
SGE Options:
  These options are used to specify the SGE job.

      --log_directory           Directory for storing the logs from SGE tasks.
                                Will be created if it does not exist. You can
                                use paths relative to the current directory.
      --job_name                Name for your job. Default: ModPipe
      --modpipe                 The location of the ModPipe.pl binary on the
                                cluster. Will default to the binary in this
                                ModPipe installation.
      --disks                   The names of the disk-complexes defined in SGE.
                                Specify as a string enclosed within double 
                                quotes. Required. 
      --nodes                   The names of the node-complexes defined in SGE.
                                Specify as a string enclosed within double
                                quotes. Required. 
      --priority                Priority for running the tasks.  Default: -4
      --sge_file                Output filename to write the SGE job script.
                                Default: sge-modpipe.csh
MODPIPE Options:
  These are options that you would normally specify on the command-line of
  ModPipe.pl. These will form the basis for the SGE task included in the
  SGE job file.

      --exit_stage              Choose the exit stage for the program. You can
                                quit after one of the following: PROFILE,
                                ALIGNMENTS, MODELS.
                                Default: MODELS 
      --hits_mode               Mode for calculating template hits. It is a
                                four-letter code containing 1 (true) or 0
                                (false) for each of Seq-Seq, Prf-Seq, Prf-Prf,  
                                and Seq-Prf, respectively. For instance, to
                                calculate Seq-Seq and Prf-Prf, set it to 1010.
                                Default: 1001
      --score_by_tsvmod         [ON],OFF (set off when tsvmod is not installed)
      --evalue_hits             The E-value threshold to get hits against
                                template databases. This value controls hits
                                from all three searches.
                                Default: 1.0
      --clean_up                Flag to clean up the temporary directory
                                after all operations. Can be OFF or ON.
                                Default: ON
EOF
}

