# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.


from modeller import *
from modpipe.sequtils import *
from modpipe.resutils import *
import copy

def make_alignment_copy(env, aln):
    """This takes an alignment object and returns a simple copy
    of the alignment in a new alignment object. Note that this is
    not a sophisticated copy. Some information of the object may
    be lost."""
    b = alignment(env)
    count = 0
    for a in aln:
        count = count + 1
        idx = count - 1
        alnseq = get_alignment_positions(aln.positions, a)

        b.append_sequence(''.join(alnseq))
        b[idx].code = copy.copy(a.code)
        b[idx].prottyp = copy.copy(a.prottyp)
        b[idx].atom_file = copy.copy(a.atom_file)
        b[idx].range = copy.copy(a.range)
        b[idx].name = copy.copy(a.name)
    return b

def create_modeller_alignment(env, comment, seq1, seq2, code1, type1,
                              file1, range1, code2, type2, file2, range2):
    """This takes two lists of residues to create an
    alignment and returns the object."""
    a = alignment(env)
    a.append_sequence(''.join(seq1))
    a.append_sequence(''.join(seq2))
    a.comments.insert(0, comment)
    (a[0].code, a[0].prottyp, a[0].atom_file, a[0].range) \
        = (code1, type1, file1, range1)
    (a[1].code, a[1].prottyp, a[1].atom_file, a[1].range) \
        = (code2, type2, file2, range2)
    return a


def fix_aligned_sequence(modenv, alnseq, refseq):
    """This will take a sequence from an alignment and the actual
    sequence of the same (as read by, say, Modeller) and patch the
    the alnseq according to the refseq. This is mainly to handle
    situations where alignment programs tend to read and interpret
    the residues in a PDB file in different ways."""

    # Remove gaps from aligned sequence
    alnseq_ng = remove_gaps(alnseq)

    # Create an alignment object for the first sequence along
    # with reference (as read by Modeller) and generate a simple
    # alignment using the identity matrix.
    refalna, refalnb = quick_align(modenv, refseq, alnseq_ng)

    # Do the 3-way comparison that will bring all three aligned
    # sequences (original, reference, and aligned original) are
    # the same length.
    y = 1
    while y > 0:
        match = 0
        for x in range(0, max(len(refalnb), len(alnseq))):

            # Kickstart match if starting position is a gap (overhang)
            # -ACDEFG  (alnseq)
            # ACDDEFG  (refalna)
            # ACD-EFG  (refalnb)
            if match == 0 and is_gap(alnseq[x]) and not is_gap(refalnb[x]):
                refalna.insert(x, alnseq[x])
                refalnb.insert(x, alnseq[x])
                break

            # Adjust for length if there are trailing gaps (overhangs)
            # -ACD-EFG-  (alnseq)
            # -ACDDEFG   (refalna)
            # -ACD-EFG   (refalnb)
            elif match > 0 and x == len(refalnb) and x < len(alnseq):
                refalna.append(alnseq[x])
                refalnb.append(alnseq[x])
                break

            # Match condition
            # ACD-EFG  (alnseq)
            # ACDDEFG  (refalna)
            # ACD-EFG  (refalnb)
            elif refalnb[x] == alnseq[x]:
                match += 1
                if match == len(alnseq):
                    y = 0
                    break

            # Leading gaps in reference alignment
            # ACDEFG  (alnseq)
            # ACDDEFG (refalna)
            # -ACDEFG (refalnb)
            elif match == 0 and is_gap(refalnb[x]) and not is_gap(alnseq[x]):
                refalna.pop(x)
                refalnb.pop(x)
                break

            # Insert gaps from the original alnseq (internal)
            # ACD-EF-G  (alnseq)
            # ACDDEFG   (refalna)
            # ACD-EFG   (refalnb)
            elif match > 0 and is_gap(alnseq[x]) and not is_gap(refalnb[x]):
                refalna.insert(x, alnseq[x])
                refalnb.insert(x, alnseq[x])
                break

            # Deletion wrt the ref sequence
            # ACDEFG  (alnseq)
            # ACDDEFG (refalna)
            # ACD-EFG (refalnb)
            elif match > 0 and is_gap(refalnb[x]) and not is_gap(alnseq[x]):
                alnseq.insert(x, refalnb[x])
                break


    # Remove trailing gaps from reference alignment
    del refalna[len(alnseq):]
    del refalnb[len(alnseq):]

    # Now simply fix the aligned sequence using the reference sequence
    for x in range(0, len(refalna)):
        if is_gap(refalna[x]) and not is_gap(alnseq[x]):
            alnseq[x] = '-'

        elif not is_gap(refalna[x]) and is_gap(alnseq[x]):
            alnseq[x] = refalna[x]

        elif refalna[x] != alnseq[x] and \
           not is_gap(refalna[x]) and not is_gap(alnseq[x]):
            alnseq[x] = refalna[x]

    return alnseq


def quick_align(env,seq1,seq2):
    """This creates a very simple alignment between
    the two sequences using the identity matrix and
    low gap penalties."""
    a = alignment(env)
    a.append_sequence(''.join(seq1))
    a.append_sequence(''.join(seq2))
    a.align(gap_penalties_1d=(-3000,-1000), matrix_offset=0,
       local_alignment=True, rr_file='$(LIB)/id.sim.mat')
    a1 = get_alignment_positions(a.positions, a[0])
    a2 = get_alignment_positions(a.positions, a[1])
    return a1, a2


def parse_modeling_alignment(env, alnfile):
    """Parse the modeling alignment file and return the
    sequence and template entries."""
    aln = alignment(env, file=alnfile, align_codes='all',
                    alignment_format='pir')
    knowns = []
    sequence = ''
    for a in aln:
        if a.prottyp.startswith('structure'):
            knowns.append(a.code)
        if a.prottyp.startswith('sequence'):
            sequence = a.code
    return tuple(knowns), sequence


def get_percent_gaps(aln):
    """This takes an alignment object and calculates the
    percentage of gaps - defined as the percent ratio
    between the number of residues in the target aligned
    to a gap versus the number of residues in the target
    aligned to a standard residue. If a multiple alignment
    the lowest such percentage is returned after all
    pairwise comparisons. This assumes that the alignment
    is in modeling format, ie., the last sequence is the
    target (reference)."""
    p = len(aln) - 1
    s = aln[p]
    pcref = 100
    for x in range(0, p):
        gaps = 0
        alip = 0
        for p in aln.positions:
            r0 = p.get_residue(s)
            r1 = p.get_residue(aln[x])
            if r0 is None and r1 is None:
                continue
            elif r0 is None or r1 is None:
                gaps += 1
            elif r0 is not None and r1 is not None:
                alip += 1
        pcgaps = 100*gaps/alip
        if pcgaps < pcref:
            pcref = pcgaps
    return pcref


def get_highest_sequence_identity(aln):
    """This takes an alignment object and does a pairwise
    comparison between the last sequence in the alignment
    (assumed to be the target) and all other sequences."""
    p = len(aln) - 1
    s = aln[p]
    idref = 0
    for x in range(0, p):
        id = s.get_sequence_identity(aln[x])
        eq = s.get_num_equiv(aln[x])
        if id > idref:
            idref = id
    return idref

def get_alignment_sequence(alnpos, alnseq):
    """This takes an alignment/profile position object and an
    alignment/profile sequence object and returns a list of
    residues in the alignment."""
    seq = []
    for p in alnpos:
        res = p.get_residue(alnseq)
        if res:
            seq += res.code
    return seq


def get_alignment_positions(alnpos, alnseq):
    """This takes an alignment/profile position object and an
    alignment/profile sequence object and returns a list of
    alignment positions in 1-letter codes."""
    seq = []
    for p in alnpos:
        res = p.get_residue(alnseq)
        if res is None:
            seq += '-'
        else:
            seq += res.code
    return seq


def get_alnseq_by_prottyp(aln, typ):
    """This takes an alignment object and returns all
    entries that are of type prottyp as a list."""
    str = []
    for s in aln:
        if s.prottyp == typ:
            str.append(s)
    return str


def get_alnseq_by_code(aln, code):
    """This takes an alignment/profile object and returns the
    entry (alnsequence object) that has the specified code."""
    seq = ''
    for e in aln:
        if e.code == code:
            seq = e
            break
    return seq
