#!/usr/bin/perl -w


$save_aln_sep = $/;


#Open the alignment file 
print "Enter Alignment filename (PIR format) [Example: TvLDH-1bdmA.ali] : ";
$alnfname = <STDIN>;
open(ALNFNAME, $alnfname) or die "Alignment file not found. Please check if file exists or if filename is correct\n";



#Get the Target Code
print "Enter code of the Target (Model) sequence [Example: TvLDH]: ";
$targcode = <STDIN>;
chomp $targcode;



#Get the Template Code
print "Enter code of the Template structure [Example: 1bdmA]: ";
$tempcode = <STDIN>; 
chomp $tempcode;


$/ = undef;


#Store alignment data
$alignment = <ALNFNAME>;
close ALNFNAME;



#Here we split the alignment using the >P1 found in all the PIR sequences, so that
#the two sequences can be obtained in individual #variables. check_aln performs this job.
#Once obtained, the sequence alone is extracted using the parse_aln subroutine.
@alnseqs = split(/>P1;/, $alignment);



#extract TARGET sequence
$dtargseq = check_aln(\@alnseqs,\$targcode);
@ftargseq = parse_aln($dtargseq, $targcode);



#extract TEMPLATE sequence
$dtempseq = check_aln(\@alnseqs,\$tempcode);
@ftempseq = parse_aln($dtempseq, $tempcode);



#extract TARGET gap positions
@targgaps = calc_gaps(@ftargseq);
print "Target gaps are at @targgaps\n";



#extract TEMPLATE gap positions
@tempgaps = calc_gaps(@ftempseq);
print "Template gaps are at @tempgaps\n";



#The energy profiles contain some extra information. only the energy values of the
#residues are required. The subroutine profiletodat() performs this function 
$/ = $save_aln_sep;



#Check if the TARGET profile files are already present. Else ask for a filename.
$targprofile = "$targcode.profile";
if(!open(TARGPROFILE, $targprofile))
{
        print "Enter name of Target profile [Example: TvLDH.profile]: ";
        $targprofile = <STDIN>;
}
open(TARGPROFILE, $targprofile) or die "Can't open Target profile: $targprofile\n";
@targprofile = <TARGPROFILE>;
close TARGPROFILE;



#Check if the TEMPLATE profile files are already present. Else ask for a filename.
$tempprofile = "$tempcode.profile";
if(!open(TEMPPROFILE, $tempprofile))
{
        print "Enter name of Template profile [Example: 1bdmA.profile]: ";
        $tempprofile = <STDIN>;
}
open(TEMPPROFILE, $tempprofile) or die "Can't open Template profile: $tempprofile\n";
@tempprofile = <TEMPPROFILE>;
close TEMPPROFILE;



#Parse TARGET Profile file
@targdat = profiletodat(@targprofile);



#Parse TEMPLATE Profile file
@tempdat = profiletodat(@tempprofile);



#The gaps in the sequences do not contain energy values. Here we modify the dat file(previous step) to 
#Correct TARGET energy data for gaps (insert ? in gapped positions) => these sites won't be plotted
@mtargdat_old = modify_dat(\@targdat, \@targgaps);
@mtargdat = map {$_."\t".$mtargdat_old[$_]."\n"} (0 .. $#mtargdat_old-1);



#Correct TEMPLATE energy data for gaps (insert ? in gapped positions) => these sites won't be plotted
@mtempdat_old = modify_dat(\@tempdat, \@tempgaps);
@mtempdat = map {$_."\t".$mtempdat_old[$_]."\n"} (0 .. $#mtempdat_old-1);



##GnuPlot does not accept arrays. Hence we save the two dat files for plotting
#Write TARGET dat (see previous block) to file
$gtargdat = "$targcode.dat";
open(GTARGDAT, ">$gtargdat");
print GTARGDAT @mtargdat;



#Write TEMPLATE dat (see previous block) to file
$gtempdat = "$tempcode.dat";
open(GTEMPDAT, ">$gtempdat");
print GTEMPDAT "@mtempdat";



#Finally plot these values. plot_dope calls gnuplot and plots the data. 
$dopeout = "dope.png";
open(DOPEOUT, $dopeout);
if( -e DOPEOUT)
{
        print "$dopeout exists\n";
        $dopeout = check_dope();
}
plot_dope($gtargdat, $gtempdat, $dopeout);




###############################SUBROUTINES###################
#check alignment and send sequences for parsing
sub check_aln
{
        my($alnseqs, $code)= @_;
        
        foreach $seq(@$alnseqs)
        {
                if($seq =~ /$$code/)
                {
                        $return_seq = $seq;
                }
        }
	if($return_seq eq '') {die "Can't find code: $$code in alignment; subroutine check_aln failed!\n";}
	return $return_seq;
}



#Parse alignment files to obtain sequence alone
sub parse_aln
{
        
        my($seq, $code) = @_;

        my @seq = $seq;
        my @parseq;

        foreach my $line(@seq)
        {
                $line =~ s/\n//;

                if($line =~ /^$code/)
                {
                        $line =~ s/$code.*//;
                        $line =~ s/(?:sequence|structureX):.*//;
                        $line =~ s/\n//s;
                        
                        push(@parseq, $line);
                }
        }

	if (!@parseq) { die "Check your alignment (PIR) file; subroutine parse_aln failed!\n"}
        return @parseq;
}



#calculates gaps given the sequence alone 
sub calc_gaps
{
        my ($sequence) = @_;
        $sequence =~ s/\n//;
        my @gaps;

        foreach my $position(0 .. length($sequence)-1)
        {
                my $gap = substr($sequence, $position, 1);

                if($gap eq '-')
                {
                        push(@gaps, $position+1);
                }
        }
	if(!@gaps) { die "subroutine calc_gaps failed\n"}
        return @gaps;

}

#To convert the draft profile to working dat file
sub profiletodat
{
        my(@profile) = @_;
        my @modprofile;

        foreach my $line(@profile)
        {
                
                if($line =~ /^\s*\#/)
                {
                        next;
                }
                else
                {                       
			my @a = split ' ', $line;
			my $energy = $a[42];                        
			#my $energy = substr($line, 436, 10);
			#print $energy,"\n";
                        push(@modprofile,$energy);
                }
        }
	if (!@modprofile) { die "Something is wrong with an Energy Profile file; subroutine
                                 profiletodat failed\n"}     
	shift @modprofile;
	return @modprofile;
}       



#To modify the dat files. Receives reference dat file and gap file containg positions of gaps

sub modify_dat
{
        my($dat, $gaps) = @_;

        foreach $position(@$gaps)
        {
                if($position<@$dat)
                {
                        splice(@$dat, $position, 0, "?");
                }
        }

        return @$dat;
}



#Check for output file dope.png. If it exists, ask for a new filename
sub check_dope
{

        print "Should I overwrite (y/n): ";
        $check = <STDIN>;
        chomp $check;

        if($check =~ /y/i)
        {               
		return $dopeout;
        }
        elsif($check =~ /n/i)
        {
                print "Enter new filename : ";
                $dopeout=<STDIN>;
                chomp $dopeout;
                return $dopeout;
        }
        else
        {
                print "Please enter y/n\n";
		$dopeout = check_dope();
        }
        
}



#Plotting the values using gnuplot
sub plot_dope
{
        my($targdat, $tempdat, $dopeout) = @_;
        
        my $gnuplot = "/usr/bin/gnuplot";
        
        my $terminal = "set terminal png\;\n";
        my $xlabel = "set xlabel \"Residue Index\"\;\n";
        my $ylabel = "set ylabel \"DOPE Score\"\;\n";
        my $missing = "set datafile missing \"?\"\;\n";
        my $output = "set output \"$dopeout\"\;\n";
        
        my $plot = "plot \"$targdat\" using 1:2 with lines, \"$tempdat\" using 1:2 with lines\;\n";

        my @plotset = ($terminal,$xlabel,$ylabel,$missing,$output,$plot);
#       print "@plotset\n";

        my $plotscript = "plotscript.txt";
        
        open(PLOTSCRIPT, ">$plotscript") or die "Can't open file $plotscript for writing\n";
        
        print PLOTSCRIPT @plotset;

        close (PLOTSCRIPT);
        
        my $finalscript = "plotscript.txt";
        
#       open(FINALSCRIPT, $finalscript);
#       
#       @gnuplotscript = <FINALSCRIPT>;
        
        `$gnuplot $finalscript`;

        print "The DOPE plot is in $dopeout\n";
}
