#!/usr/bin/perl
# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

use strict;
use Getopt::Long;
use File::Basename;

use MPLib::Version;
use PLLib::Utils;
use PLLib::Sequence;
use PLLib::Alignment;

my $seqfile = '';
my $tgtcode = '';
my $seqfmt  = 'FASTA';
my $alilist  = '';
my @alifiles = ();
my $alifmt = 'PIR';
my $outfile = '';
my $outfmt = 'PIR';
my $help = '';

# -- Get command line options
GetOptions (
           "anchor_alignment=s"      => \$seqfile,
           "target_code=s"           => \$tgtcode,
           "anchor_aliformat=s"      => \$seqfmt,
           "alignments_list=s"       => \$alilist,
           "alignments_format=s"     => \$alifmt,
           "output_filename=s"       => \$outfile,
           "output_format=s"         => \$outfmt,
           "help"                    => \$help,
           "version"                 => sub { VersionMessage() },
           );

# -- Print usage information
if ( $help ) {
   &usage;
   exit 0;
}

# --- Get Program name
my $subrname = GetSubrName();

# -- Check input arguments
unless ( $seqfile && $tgtcode && $alilist){
   print "${subrname}__E> Missing mandatory options\n\n";
   die "Try $subrname --help for usage information\n";
}

# -- Set the various default parameters
$seqfmt  = 'FASTA' unless ( $seqfmt =~ /\bPIR\b/i);

$outfile = $outfile ? $outfile : fileparse($seqfile, '\..*') . ".mul";

# --- Check for existence of the input file
die "${subrname}__E> Could not find anchor alignment file: $seqfile\n"
   unless ( -e $seqfile );

# --- Check for existence of the file with alignment filenames
die "${subrname}__E> Could not find file with alignment filenames: $alilist\n"
   unless ( -e $alilist );

# -- Check alignment formats
die "${subrname}__E> This script currently handles ONLY PIR formats for alignments\n"
  unless ( $alifmt =~ /\bPIR\b/i && $outfmt =~ /\bPIR\b/i );

# -- Get the architecture of the machine
my $arch;
$arch = GetArch()
  or die "${subrname}__E> Cannot run on this architecture: $arch\n";

# -- Read in the list of alignments to merge
my $fh_alilist = OpenFile($alilist);
while ( chomp(my $alifile = <$fh_alilist>) ){
  if ( -e $alifile ){
    push @alifiles, $alifile;
  } else {
      warn "${subrname}__E> Could not find alignment file $alifile\n";
      next;
    }
}

# -- Ensure there is at least one file to process
die "${subrname}__E> No alignment files to merge\n"
  unless ( scalar(@alifiles) > 0 );

# -- Merge alignments
my $outali = BatchMergeAlignments($seqfile, $tgtcode, $seqfmt, 
             \@alifiles, $alifmt);

# -- Write out the final alignment
my $fh_out = OpenNewFile( $outfile );
print $fh_out $outali;
close($fh_out);

exit 0;

# --- Usage
sub usage {
print <<EOF;
${0}:

      --anchor_alignment        Input alignment containing the anchor
                                sequence to be used as a reference during
                                merge. Can contain a single sequence.
                                Will not proceed without this option.

      --target_code             Code of the sequence (in the anchor alignment)
                                to be used as a reference during merge.
                                Mandatory option.

      --anchor_aliformat        Format of the anchor alignment file. 
                                Can be in FASTA or PIR.
                                Default: FASTA

      --alignments_list         File containing the list of alignments that have
                                to get merged. Should contain full paths.
                                Mandatory option.

      --alignments_format       Format of the alignments going to be merged.
                                Currently handles only PIR.
                                Default: PIR

      --output_filename         Name of the output file that will contain
                                the assembled multiple sequence alignment.
                                Default: <basename-of-anchor>.mul

      --output_format           Format of the output assembled multiple 
                                sequence alignment. Currently only PIR.
                                Default: PIR

      --version                 Report version number of this program.

      --help                    This help.

EOF
}

