# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

package MPLib::MPSelectHits;
require Exporter;
@ISA    = qw(Exporter);
@EXPORT = qw( SelectHits );


use strict;
use Cwd;
use File::Basename;

use PLLib::Utils;
use PLLib::Sequence;
use PLLib::Alignment;
use MPLib::MPInit;
use MPLib::MPUtils;


sub SelectHits {

   use ExtLib::Tie::IxHash;

   # --- Get subroutine name
   my $subname = GetSubrName();

   # --- Check arguments
   my $nargs = 2;

   die "${subname}__D> Insufficient arguments\n"
      unless ( scalar(@_) == $nargs );

   # --- Reassign input arguments
   my ($seqid, $hits) = @_;

   # --- Get the current directory
   my $currdir = cwd();

   # -- Sort the models by length (descending)
   my $hitl = SortMPDataByLength( $hits );

   # -- Copy the alignment files locally
   my %alifiles = (); 
   tie %alifiles, "Tie::IxHash";
   foreach my $hit ( @$hitl ){

      # -- Get the alignment id
      my $aliid = $hit->alignment->id;

      # -- Copy the alignment file over
      my $aliloc = undef;
      unless( $aliloc = CopyAlignmentMP($seqid, $aliid) ){
         warn "${subname}__E> Failed copying alignment: $aliid\n";
         next;
      }

      # -- Append array of filenames
      $alifiles{$aliid} = $aliloc;
   }

   # -- Report numnber of alignments
   my $alicnt = keys %alifiles;
   warn "${subname}__M> Alignments found for clustering: $alicnt\n";

   # -- Stop if you have only one alignment
   return if ( $alicnt <= 1 );

   # -- Create ali objects from all alignment files
   my @aliobjects = ();
   foreach my $ali ( values %alifiles ){
      my $aliobj = undef;
      $aliobj = ReadAlignment($aliobj, $ali, 'PIR', 'VERTICAL');
      push @aliobjects, $aliobj;
   }

   # -- Cluster Alignments
   my ( $repstack, $clusters ) = ClusterAlignments( \@aliobjects, $seqid,
                                                    $init::aliclust_ovlp, 
                                                    $init::aliclust_pcovlp,
                                                    $init::aliclust_nonovlp, 
                                                    $init::aliclust_pcnonovlp, 
                                                    $init::aliclust_idcol, 
                                                    $init::aliclust_pcidcol);

   # -- Check clustering results
   unless ( scalar(@$repstack) == scalar(@$clusters) &&
            scalar(@$repstack) > 0 ){
      warn "${subname}__E> Clustering failed\n";
      return;
   }

   warn "${subname}__M> Clustering produced ", scalar(@$repstack), " representatives\n";

   # -- Write out the results into files
   my %reverse_alifiles = reverse %alifiles;

   my @representatives = my @clustermembers = ();
   foreach my $i ( 0 .. $#$repstack ){
      # -- Get the representative alignment's id
      my $ali = $repstack->[$i];
      my $alifile = $ali->filename;
      my $rep_aliid = $reverse_alifiles{$alifile};
      my @hitlines = grep { $_->alignment->id eq $rep_aliid } @$hitl;
      push @representatives, $hitlines[0];

      # -- Fetch the members of the cluster
      foreach my $aliobj ( @{ $clusters->[$i] } ){
         my $membfile = $aliobj->filename;
         my $memb_aliid = $reverse_alifiles{$membfile};
         my @hitlines = grep { $_->alignment->id eq $memb_aliid } @$hitl;
         push @{ $clustermembers[$#representatives] }, $hitlines[0];
      }
   }

   # -- Return relevant arrays of hit lines
   return ( \@representatives, \@clustermembers );
}
