# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.


from modeller import *
from modpipe.alnutils import *
from modpipe.sequtils import *
import sys, os
import gzip
import bz2

def get_pdb_repository(include_local=False):
    """Return the default directories to search for PDB files. If include_local
       is set, also include the current directory."""
    # New-style PDB repository stucture (1abc in an 'ab' directory, gzipped)
    default_pdb_repository = '/netapp/database/pdb/remediated/pdb/'
    if include_local:
        return ['./', default_pdb_repository]
    else:
        return default_pdb_repository

def get_secstr_list(mdl):
    """Take a model and return a list of secstr codes
    at each residue position."""
    c = get_list_conformation(mdl)
    # Initialize the secstr list
    secstr = []
    for i in range(len(c)):
        secstr.append('X')
    for i in range(len(c)):
        if c[i] == 'R' and c[i+1] == 'R' and \
           c[i+2] == 'R' and c[i+3] == 'R':
            secstr[i:i+4] = ['H', 'H', 'H', 'H']
        elif c[i] == 'E' and c[i+1] == 'E' and \
             c[i+2] == 'E' and c[i+3] == 'E':
            secstr[i:i+4] = ['B', 'B', 'B', 'B']
        elif c[i] == 'L':
            secstr[i] = 'L'
        elif secstr[i] == 'X':
            secstr[i] = 'U'
    return secstr

def get_list_conformation(mdl):
    """Take a model and return a list of conformations
    corresponding to each residue."""
    return [get_residue_conformation(res) for res in mdl.residues]

def get_residue_conformation(res):
    """Given a model residue object determine its
    conformation."""
    if res.phi is None or res.psi is None:
        return 'C'
    else:
        return get_phipsi_bin(res.phi.value, res.psi.value)

def get_phipsi_bin(phi, psi):
    """Given (phi,psi) get the corresponding conformation."""
    if phi >= -140 and phi <= -30 and \
       psi >=  -90 and psi <= 45:
        c = 'R'
    elif phi >= -180 and phi <= -30 and \
         ((psi >= 60 and psi <= 180) or \
          (psi >= -180 and psi <= -150)):
        c = 'E'
    elif phi >= 20 and phi <= 125 and \
         psi >= -45 and psi <= 90:
        c = 'L'
    else:
        c = 'C'
    return c


def get_PDB_range(env, aln, alnseq, code, chain):
    """This takes an alignment, an alnsequence object,
    a PDB code and a chain identifier to identify the
    beginning and ending residues of the sequence in
    the specified PDB chain."""

    # Fetch the chain object
    chn = fetch_PDB_chain(env, code, chain)

    # Extract residues from the chain
    rseq = get_chain_seq(chn)

    # Extract residues from the input alnsequence object
    tseq = get_alignment_sequence(aln.positions, alnseq)

    # Find the position of tseq in rseq
    (beg, end) = find_seq_in_seq(tseq, rseq)

    # Get the corresponding PDB numbers
    (beg, end) = fetch_PDB_num(chn, [beg, end])

    return beg, end


def get_chain_seq(chain):
    """This takes a chain object and returns the sequence
    as a list of one-letter codes."""
    return [res.code for res in chain.residues]


def fetch_PDB_num(chain, pos):
    """This routine takes a chain object and returns the PDB
    numbers of the specified residues."""
    return ['%s:%s' % (chain.residues[p].num, chain.name) for p in pos]


def locate_PDB(code, rep):
    """Take a PDB code and a list of directory locations
    and return the path to the first eligible file."""

    file = ''
    if isinstance(rep, str):
        rep = rep.split(':')

    # Handle PDB-style subdirectories (e.g. 1abc in 'ab' subdirectory)
    subdir = code[1:3]

    for loc in rep:
        for ext in ('', '.gz', '.bz2', '.Z'):
            for pdbname in (code + ext, code + '.pdb' + ext,
                            'pdb' + code + '.ent' + ext):
                for fullname in (os.path.join(loc, pdbname),
                                 os.path.join(loc, subdir, pdbname)):
                    if os.path.isfile(fullname):
                        return fullname

    # Raise an error and exit if no PDB file
    # was found
    print "PDB file not found: %s" % code
    print "   Searched repositories: %s" % rep
    sys.exit(1)

def get_uncompressed_pdb(code, rep, localdir):
    """Find the given PDB code in the repositories, and if it is compressed,
       make an uncompressed copy in localdir. Return the full path to
       the file."""
    pdbname = locate_PDB(code, rep)
    if pdbname.endswith('.gz'):
        pdb = gzip.GzipFile(pdbname).read()
        pdbname = os.path.join(localdir, code)
        open(pdbname, 'w').write(pdb)
    elif pdbname.endswith('.bz2'):
        pdb = bz2.BZ2File(pdbname).read()
        pdbname = os.path.join(localdir, code)
        open(pdbname, 'w').write(pdb)
    elif pdbname.endswith('.Z'):
        raise NotImplementedError("Sorry: cannot uncompress .Z files")
    return pdbname


def fetch_PDB_chain(env, code, chain):
    """This routine reads in the specified PDB file and
    and returns a chain object corresponding to the specified
    chain or the first one (if not specified)."""
    mdl = model(env)
    mdl.read(file=code, model_segment=('FIRST:@', 'END:'))
    if len(chain) == 1:
        chn = mdl.chains[chain]
    else:
        chn = mdl.chains[0]
    return chn
