#!/usr/bin/perl
#fpd040823

# purpose: sample script for marc demosntrate how:
# given: PDB id (and chain?)
# get:
#  1. domain assignments (SCOP & CATH)
#  2. domains in contact
#  3. domain conatct residues

#use strict;
#use warnings;

use lib '/alto2/home/fred/sali/projects/domain_interfaces/work/PiBase/progs/perl_package' ;
use DBI;
use PiBase ;
main() ;

sub main {

   my $label   = "PATCH_IT>";
   my $pdb_id  = $ARGV[0];
   my $min_id  = 75.0;		# Minimum of common residues (%) between two patches in the same chain to join them
   my $min_res = 5;		# Minimum of residues for considering a patch

   my ($dbh) = PiBase::connect_PiBase() ;

#i usually preload tables that i query a lot into a hash to speed things up:
# loads a hash pointing from pdb_id to bdp_id
   my ($pdb_2_bdp) = PiBase::load_bdp_ids($dbh, qw/pdb_id_2_bdp_id/) ;

# loads a hash pointing from bdp_id to interface_contacts_tables file path
   my $bdp_intcont_fn ;
   {
      my ($t_bdpid, $t_sourcefile) = PiBase::mysql_fetchcols($dbh,
         "SELECT bdp_id, source_file FROM interface_contacts_tables") ;

      foreach my $j ( 0 .. $#{$t_bdpid}) {
         $bdp_intcont_fn->{$t_bdpid->[$j]} = $t_sourcefile->[$j] ; }
   }


# make sure pdb is in PiBase

   if (!exists $pdb_2_bdp->{$pdb_id}) {
      die "Error: pdb $pdb_id not found in PiBase\n" ; }

   my $bdp_id = $pdb_2_bdp->{$pdb_id} ;

   my $int_res = get_int_res($bdp_intcont_fn->{$bdp_id}) ;
   my @chains = ();
   my @patches = ();
   my (%ninter,%inter);
   foreach my $set (keys %{$int_res}) {
      if ($#{$int_res->{$set}} <= $min_res) { next; }
      my ($da,$db,$chain) = split(/\||:/,$set);
      my @res = Unique(@{$int_res->{$set}});
      push(@chains,$chain);
      push(@patches,$set);
      $ninter{$chain}++;
      $inter{$chain}{$ninter{$chain}} = join(":",$da,$db,@res);
      $g{$chain}{$ninter{$chain}} = 0;
      $name = "partners_".$chain;
      push(@{$name},$db);
   }
   # PRINT RESULTS
   print "$label General data for PDB $ARGV[0]\n";
   @chains = &Unique(@chains);
   foreach my $chain (@chains) {
      print "$label Chain: $chain\n";
      print "$label Interactions: $ninter{$chain}\n";
      for my $i (1..$ninter{$chain}) {
         print "$label \t$i --> $inter{$chain}{$i}\n";
      }
      $name = "partners_".$chain;
      print "$label Partners: @{$name}\n";
   }
   # CLUSTER PATCHES
   foreach my $chain (@chains) {
      $patch_num{$chain} = 0; 
      for my $i (1..$ninter{$chain}) { 
         if ($g{$chain}{$i}) { next; }
         $patch_num{$chain}++;
         $g{$chain}{$i} = 1;
         $patchid = $ARGV[0].$chain.":".get_num($patch_num{$chain},3); $patchid =~ s/\s+/_/;
         ($d1a,$d1b,@res1) = split(/:/,$inter{$chain}{$i});
         push(@{$patchid},$d1a.$d1b);
         push(@{"res".$patchid},@res1);
         push(@{"par".$patchid},$d1b.$d1a);
         for my $j ($i+1..$ninter{$chain}) {
            ($d2a,$d2b,@res2) = split(/:/,$inter{$chain}{$j});
            if ($d1a eq $d2a) {
               if ($#res1 > $#res2) { $min = $#res2+1; } else { $min = $#res1+1; }
               ($ru,$ri,$rd) = &ArrayUniIntDiff(\@res1,\@res2);
               @union_ab = @$ru;
               @isect_ab = @$ri;
               @diffe_ab = @$rd;
               $grp_id = 100*($#isect_ab+1)/($min);
               if ($grp_id >= $min_id) {
                  push(@{$patchid},$d2a.$d2b);
                  push(@{"res".$patchid},@res2);
                  push(@{"par".$patchid},$d2b.$d2a);
                  $g{$chain}{$j} = 1;
               }
            }
         }
      }
   }
   # PRINT CLUSTERED PATCHES
   # PRINT OUTPUT FILES
   foreach my $chain (@chains) {
      my $subdir = substr($ARGV[0],0,2);
      if (!-e $subdir) { system("mkdir $subdir"); }
      $output = $subdir."/".$ARGV[0].$chain.".pat"; $output =~ s/\s+/_/;
      open(OUT,">$output"); 
      print  OUT "#Data from PiBase for PDB file $ARGV[0] [@chains]\n";
      print  OUT "#                      Chain:      $chain\n";
      printf OUT "# Number of patches in chain: %6i\n",$patch_num{$chain};
      printf OUT "# Patch definitions\n";
      for my $p (1..$patch_num{$chain}) {
         $patchid = $ARGV[0].$chain.":".get_num($p,3); $patchid =~ s/\s+/_/;
         print OUT "PATCH\t$patchid\t".join(",",&Unique(@{'res'.$patchid}))."\n";
      }
      printf OUT "# Patch partners\n";
      for my $p (1..$patch_num{$chain}) {
         $patchid = $ARGV[0].$chain.":".get_num($p,3); $patchid =~ s/\s+/_/;
         print OUT "PARTN\t$patchid\t";
         print "$label Chain $chain. Patch $patchid\n";
         print "$label\tOther names:@{$patchid}\n";
         print "$label\tResidues:@{'res'.$patchid}\n";
         print "$label\tPartners:@{'par'.$patchid}\n";
         print "$label\tInteracts with: ";
         foreach my $pair (@{'par'.$patchid}) {
            foreach my $chain2 (@chains) {
               for my $p2 (1..$patch_num{$chain2}) {
                  $patchid2 = $ARGV[0].$chain2.":".get_num($p2,3); $patchid2 =~ s/\s+/_/;
                  if (grep /$pair/, @{$patchid2}) { 
                     print "$patchid2\t"; 
                     print OUT "$patchid2 "; 
                  }
               }
            }
         }
         print "\n$label\n";
         print OUT "\n";
      }
      close(OUT);
   }
}


sub get_domains {

   my $dbh = shift ;
   my $bdp_id = shift ;

# get all domain assignments

   my ($subset_id, $class, $source) = PiBase::mysql_fetchcols($dbh,
      "SELECT subset_id, class, subset_source_id FROM subsets ".
      "WHERE bdp_id = $bdp_id AND subset_source_id = 1") ;

   my $subsets ;
   my $subset_def ;
   foreach my $j ( 0 .. $#{$subset_id}) {
      $subsets->{$subset_id->[$j]} =
         {class => $subset_id->[$j], source => $source->[$j]} ;

      my ($chain_id, $start, $end) = PiBase::mysql_fetchcols($dbh,
         "SELECT chain_id, start_resno, end_resno FROM subsets_details ".
	 "WHERE subset_id = \"$subset_id->[$j]\"") ;
      foreach my $k ( 0 .. $#{$chain_id}) {
         push @{$subset_def->{$subset_id->[$j]}},
	    {chain => $chain_id->[$k], start => $start->[$k], end => $end->[$k]};
      }

   }
   return ($subsets, $subset_def) ;

}


sub get_interfaces {

   my $dbh = shift ;
   my $bdp_id = shift ;

   my ($subset_id_1, $subset_id_2) = PiBase::mysql_fetchcols($dbh,
      "SELECT a.subset_id_1, a.subset_id_2 FROM intersubset_contacts as a, ".
      "interface_sasa as b WHERE a.bdp_id = $bdp_id AND ".
      "a.subset_id_1 = b.subset_id_1 AND a.subset_id_2 = b.subset_id_2 AND ".
      "dsasa_all >= 500") ;

   my $interfaces ;
   foreach my $j ( 0 .. $#{$subset_id_1}) {
      $interfaces->{$subset_id_1->[$j]}->{$subset_id_2->[$j]}++; }

   return $interfaces ;
}


sub get_int_res {
   my $int_res_fn = shift ;
   my ($subset_id_1, $subset_id_2, $chain_1, $res_1, $chain_2, $res_2) =
      PiBase::rawselect_metatod($int_res_fn,
      "SELECT subset_id_1, subset_id_2, chain_id_1, resno_1, ".
      "chain_id_2, resno_2 FROM $int_res_fn") ;
   my $intres ;
   foreach my $j ( 0 .. $#{$subset_id_1}) {
     if ($subset_id_1->[$j] =~ /SCOP/) {
      #print "($subset_id_1->[$j]) ".$res_1->[$j].":".$chain_1->[$j]." -- ".
      #      "($subset_id_2->[$j]) ".$res_2->[$j].":".$chain_2->[$j]."\n";
      push @{$intres->{$subset_id_1->[$j]."|".$subset_id_2->[$j].":".$chain_1->[$j]}},$res_1->[$j];
      push @{$intres->{$subset_id_2->[$j]."|".$subset_id_1->[$j].":".$chain_2->[$j]}},$res_2->[$j];
      #push @{$intres->{$subset_id_1->[$j]."|".$subset_id_2->[$j].":".$chain_2->[$j]}},$res_2->[$j];
     }
   }
   return $intres;
}


sub Unique {
   @in = @_;
   undef %saw;
   @saw{@in} = ();
   @out = sort by_num keys %saw;
   return(@out);
}
1;

sub by_num { $a <=> $b; }

sub ArrayUniIntDiff {
   my $ra = shift; @a = @$ra;
   my $rb = shift; @b = @$rb;
   my ($e,%count,@union,@isect,@diff);

   foreach $e (@a, @b) { $count{$e}++ }
   foreach $e (keys %count) {
      push(@union, $e);
      if ($count{$e} == 2) {
         push @isect, $e;
      } else {
         push @diff, $e;
      }
   }
   # Return
   return(\@union,\@isect,\@diff);
}
1;

sub get_num {
   my $n = shift;
   my $l = shift;
   my $len = length($n);
   for my $i ($len..$l) { $n = "0".$n; }
   return($n);
}
1;
