#!/usr/bin/python
# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

from optparse import OptionParser
import modpipe.version
import modpipe.sequence
import os
import sys

def get_options():
    parser = OptionParser(version=modpipe.version.message())

    parser.set_usage("""
 Add sequences from an alignment file to a directory.

 Usage: %prog [options] infile dir

 infile is the input sequence file, and dir is the directory to which
 sequences will be added.

 Run `%prog -h` for help information
""")
    parsers = {'FASTA': modpipe.sequence.FASTAFile,
               'PIR': modpipe.sequence.PIRFile,
               'SPTR': modpipe.sequence.SPTRFile}
    outext = {'FASTA': '.fsa', 'PIR': '.pir'}
    in_parsers = {}
    out_parsers = {}
    for (key, value) in parsers.iteritems():
        if hasattr(value, 'read'):
            in_parsers[key] = value
        if hasattr(value, 'write'):
            out_parsers[key] = value

    parser.add_option("-f", "--input_format", dest="infmt", metavar="FORMAT",
                      type="choice",
                      help="Format of the input file (%s, default PIR)" \
                           % ", ".join(in_parsers.keys()),
                      choices=in_parsers.keys(), default="PIR")
    parser.add_option("-g", "--output_format", dest="outfmt", metavar="FORMAT",
                      type="choice",
                      help="Format of the output file (%s, default PIR)" \
                           % ", ".join(out_parsers.keys()),
                      choices=out_parsers.keys(), default="PIR")
    parser.add_option("-c", "--id_type", dest="id", metavar="ID",
                      type="choice", choices=('CODE', 'MD5'),
                      help="""Type of ID codes for the sequences (CODE, MD5)""",
                      default='CODE')
    parser.add_option("-s", "--dirstructure", dest="dirstruc", metavar="TYPE",
                      type="choice", choices=('SIMPLE', 'PDB', 'MODPIPE'),
                      help="Type of directory structure (SIMPLE, PDB, MODPIPE)",
                      default='SIMPLE')
    opts, args = parser.parse_args()

    if len(args) != 2:
        parser.error("You must specify an input file name and a directory")
    out_parser = out_parsers[opts.outfmt]()
    in_parser = in_parsers[opts.infmt]()
    return (args[0], args[1], in_parser, out_parser, opts.id, opts.dirstruc,
            outext[opts.outfmt])

def create_directory(dirstruc, seq, outdir):
    if dirstruc == 'SIMPLE':
        dir = os.path.join(outdir, seq.code)
    elif dirstruc == 'PDB':
        dir = os.path.join(outdir, seq.code[1:3], seq.code)
    else:
        dir = os.path.join(outdir, seq.code[:3], seq.code)
    if not os.path.exists(dir):
        os.makedirs(dir)
    return dir

def main():
    infile, outdir, in_parser, out_parser, idtype, dirstruc, ext = get_options()

    unq = modpipe.sequence.UniqueFile()
    for (count, seq) in enumerate(in_parser.read(file(infile))):
        seq.clean()
        if count > 0 and count % 1000 == 0:
            print "AddSeq.py__M> Processed %d sequences" % count
        code = id = seq.code
        # Calculate ModPipe sequence ID if requested
        if idtype == 'MD5':
            seq.code = id = seq.get_id()
        unq.add_sequence(id, code)
        dir = create_directory(dirstruc, seq, outdir)
        outfile = os.path.join(dir, seq.code + ext)
        out_parser.write(file(outfile, 'w'), seq)
    unq.write(file(unq.file_name_from_seqfile(infile), 'w'))


if __name__ == '__main__':
    main()
