#!/usr/bin/perl

use strict;
use Getopt::Long;
use File::Basename;
use Cwd;

# --- Load local modules
use PLLib::Utils;
use PLLib::Sequence;
use PLLib::Modeller;

use MPLib::Version;
use MPLib::Binaries;
use MPLib::MPInit;
use MPLib::MPUtils;
use MPLib::MPModules;

# --- Get command line options
$Getopt::Long::ignorecase = 0;

my ( $runname, $rundir, $nodes, $disks, $modpipeclusterbase);

my @final_modby = ();

my ( @hitsmode, $templatepdbid, $templatepdbchain,$enddir,$nodes_tb, 
     $max_concurrent_tasks_tb, $template_option, $nodes_fast );

my ( $help,$tmpdir, $tmpdirroot,$pdb_file,$tsvmod_flag, 
     $runwhere, $cleanup, $max_sequences, $conffile);

# --- Command-line options -- reflects subset used in ModWebd
GetOptions (
           "runname=s"                 =>      \$runname,
           "run_directory=s"           =>      \$rundir,
           "end_directory=s"           =>      \$enddir,
           "nodes=s"                   =>      \$nodes,
           "nodes_tb=s"                =>      \$nodes_tb,
           "max_concurrent_tasks_tb=i" =>      \$max_concurrent_tasks_tb,
           "nodes_fast=s"              =>      \$nodes_fast,
           "tmpdir=s"                  =>      \$tmpdir,
           "tmpdirroot=s"              =>      \$tmpdirroot,
           "disks=s"                   =>      \$disks,
           "cleanup=s"                 =>      \$cleanup,
           "max_sequences=s"           =>      \$max_sequences,
           "score_by_tsvmod=s"         =>      \$tsvmod_flag,
           "modpipeclusterbase=s"      =>      \$modpipeclusterbase,
           "final_models_by=s"         =>      \@final_modby,
           "run_where=s"               =>      \$runwhere,
           "hits_mode=s"               =>      \@hitsmode,
           "pdb_code=s"                =>      \$templatepdbid,
           "pdb_chain=s"               =>      \$templatepdbchain,
           "template_option=s"	       =>      \$template_option,
           "pdb_file=s"	               =>      \$pdb_file,
           "help"                      =>      \$help,
           "conf_file=s"               =>      \$conffile,
           "version"                   => sub { VersionMessage() },
           );


# --- Check command line options
if ( $help ){
   &usage;
   exit 0;
}

# --- Get Program name
my $subrname = GetSubrName();

# --- Check mandatory options
$runwhere = 'SGE' unless ( $runwhere );

if ($runwhere eq 'SGE') {
  unless ($disks && $nodes) {
    warn "${subrname}__E> Missing mandatory options: --disks and/or --nodes\n";
    die  "${subrname}__E>   Try --help for usage help\n";
  }
} elsif ($disks || $nodes) {
  warn "${subrname}__E> --disks and --nodes do not work with " .
       "--run_where='LOCAL'\n";
  die  "${subrname}__E>   Try --help for usage help\n";
}

if (!$cleanup) {
   $cleanup="ON";
}

# Cleans up HitsModes - Seq-Seq is always used, not included in list. 
@hitsmode = &CleanHitsModes( @hitsmode );

  # -- Change to rundir
  if (!$rundir) {
     $rundir=cwd();
  }
  chdir($rundir);

  my $final_modby_str = join ",", @final_modby;
  $final_modby_str =~ s/\s//g;
  if (!$tmpdir) {
     if (!$tmpdirroot) {
        $tmpdir="/scratch/ModWeb20d/${runname}/";
     } else {
        $tmpdir="$tmpdirroot/${runname}";
     }
   }
  if ( $runwhere =~ /\bSGE\b/ ){
     # -- Create the SGE job file
     # -- Open the SGE job file
     my $sgefile = "runTB.csh";
     my $sgefh = OpenNewFile($sgefile);

     # -- Write SGE job file
     if (($template_option eq "TEMPLATE_FAST") && ($nodes_fast)) {
         $nodes = $nodes_fast;
     }
     my $return=WriteSGETB($sgefh, $runname, $rundir, $nodes, $nodes_tb,
                $disks, $modpipeclusterbase, $final_modby_str,
                join(",",@hitsmode), $templatepdbid, $templatepdbchain,
                $template_option, $tmpdir, $pdb_file, $tsvmod_flag,
                $max_sequences, $max_concurrent_tasks_tb);

     # -- Close the SGE job file handle
     close($sgefh);
     if ($return != 1) {
       die "${subrname}__E> Error writing $sgefile\n";
     }

  
     # -- Submit job to the queue
     # -- Verify if qsub is in the path
     my $qsubexe = `which qsub 2> /dev/null` or
        die "${subrname}__E> Cannot find qsub in path\n";
     chomp $qsubexe;

     # -- Submit job and collect jobid
     warn "${subrname}__M> Submitting job to cluster\n";
     my $jobid = `qsub $sgefile`;
     chomp $jobid;
     $jobid = (split(" ",$jobid))[2];
     $jobid =~ s/\..*//;
     warn "${subrname}__M> Job successfully submitted: $jobid\n";

     # -- wait for job completion and collect results
     while ( 1 ){
        # -- Sleep for a while
        sleep 120; 

        # -- Check status of job
        my $qstatout = `qstat -j $jobid 2>&1`;
        last if ($qstatout =~ /^Following jobs do not exist/);
     }
     warn "${subrname}__M> Job $jobid completed.\n";
  } else { #runlocal option
     # -- Run the job locally
     # -- Form the options hash for TemplateBased.pl
        my %addon_options;
        if ($templatepdbchain) {
           $addon_options{" --pdb_chain"}= $templatepdbchain;
        } 
        if ($pdb_file) {
           $addon_options{" --pdb_file"} = $pdb_file;
        } 
        if ($conffile) {
           $addon_options{" --conf_file"} = $conffile;
        } 
        
        my %mpopt = ( 
                    "--pdb_code" => $templatepdbid,
                    "--template_option" => $template_option,
                    "--max_sequences"   => $max_sequences,
                    "--run_where"       => $runwhere,
                    "--run_directory"   => $rundir,
                    "--runname"         => $runname,
                    "--hits_mode"       => join(",",@hitsmode),
                    "--tmpdir_profile"  => $tmpdir,
                    "--tmpdir_sequence" => $tmpdir,
                    "--score_by_tsvmod" => $tsvmod_flag,
                    "--clean_up"        => $cleanup,
                    "--final_models_by" => $final_modby_str,
                    );
        
         my $command = GetModPipeScript("main/TemplateBased.pl")
                       . " @{[ %mpopt ]} @{[ %addon_options ]} 2>&1";
         open(RUN, "$command |");
         while (<RUN>) {
           print;
         }
         close(RUN);
  }

     # -- Post process data
#--- exit
exit 0;

# -- Subroutines

# --- Usage
sub usage {
print <<EOF;

${0}:

Interface to ModWebd for queue submission of TemplateBased.pl

Options:

      --pdb_code                  Four-character PDB code of the template of
                                interest (required).
      --pdb_chain               Single-character chain code.  If not specified,
                                the calculations will be performed for each non-
                                redundant chain.
      --max_sequences           Maximum number of sequences harvested from the Profile.
                                Default: 30,000
      --pdb_file 		Full path of pdb file, needed in case the pdb file
                                is not in the standard pdb repository 
                                The PDB code still needs to obey the PDB convention:
                                four letter code with a digit as first letter. 
                                The ATOM records must contain a chain identifier. 
      --runname                 A runname for the ModWeb run. If not
                                specified it will create one of the form:
                                ModWeb0b-(time). A directory of this name
                                will be created in the current working path.
      --run_directory           The directory from where the jobs should be
                                run. All data will be written into this
                                directory. This should be specified with 
                                absolute path.
                                Default: [PWD]/<runname>
      --tmpdirroot              TMPDIRRoot, default: /scratch/ModWeb20d/
                                will be expanded to TMPDIRROOT/<runname>
      --tmpdir                  TMPDIR, default: /scratch/ModWeb20d/<runname>
				overwrites tmpdirroot
      --final_models_by         Scheme to choose the final models by. Possible
                                values are LONGEST_DOPE, LONGEST_GA341, SEQID
                                GA341, DOPE, MPQS, TSVMod ALL.  Multiple options can
                                be specified by multiple copies of the command
                                line switch.  For example, "--final_models_by
                                LONGEST_DOPE --final_models_by SEQID" will
                                return two models.
                                Default: LONGEST_DOPE
      --modpipeclusterbase      The modpipe base accessible from the cluster.
				Will default to the base in this ModPipe installation.
      --disks                   The names of the disk-complexes defined in SGE.
                                Specify as a string enclosed within double quotes.
                                Will not proceed without this option.
      --nodes                   The names of the node-complexes defined in SGE.
                                Specify as a string enclosed within double quotes.
                                Will not proceed without this option.
      --conf_file               Optional, will usually use default ModWeb 
                                configuration file (web/conf/template_modpipe.conf)

MODPIPE Options:
  These are options that you would normally specify on the command-line of
  ModPipe.pl. These will form the basis for the SGE task included in the
  SGE job file.
      --exit_stage              Choose the exit stage for the program. You can
                                quit after one of the following: PROFILE,
                                ALIGNMENTS, MODELS.
                                Default: NONE
      --hits_mode               Mode for calculating template hits. 
                                For example: Seq-Seq, Prf-Seq, 
                                Prf-Prf, respectively. 
                                See ModPipe.pl -h for list of options. 
      --evalue_hits             The E-value threshold to get hits against
                                template databases. This value controls hits
                                from all three searches.
                                Default: 1.0
      --template                If present, creates models only if this PDB ID
                                and chain (sample: 1bcdA) is among hits from
                                the template database; also insures that the
                                input template remains after clustering.
      --template_option         [ALL], TEMPLATE,TEMPLATE_FAST. If --template is given,
                                models [ALL] hits or only the hits for the TEMPLATE.
                                A faster option if TEMPLATE_FAST, where only the profile
                                of the input template is used, thus effectively disabling
                                meaningful evalue statistics.
                                For the fast option, the shortened PDB95 files should be
                                specified in the configuration file. The option just
                                disables the computation of statistics.
      --score_by_tsvmod		[ON],OFF (set off when tsvmod is not installed)
      --clean_up                Flag to clean up the temporary directory
                                after all operations. Can be OFF or ON.
                                Default: ON

EOF
}

