#!/usr/bin/python
# This file is part of ModPipe, Copyright 1997-2010 Andrej Sali
#
# ModPipe is free software: you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ModPipe.  If not, see <http://www.gnu.org/licenses/>.

from optparse import OptionParser
import modpipe.version
import modpipe.config
import modpipe.filesystem
import modpipe.sequence

def get_options():
    parser = OptionParser(version=modpipe.version.message())

    parser.set_usage("""
 Populates the ModPipe filesystem with a FASTA file containing sequences.

 Run `%prog -h` for help information
""")

    parser.add_option("-c", "--conf_file", dest="conffile",
                      type="string",
                      help="""ModPipe configuration file. Cannot proceed
                           without this option.""", default=None)
    parser.add_option("-f", "--sequence_file", dest="seqfile",
                      type="string",
                      help="""Input file with one or more sequences in FASTA
                              format. Cannot proceed without this option.""",
                      default=None)
    opts, args = parser.parse_args()

    # Check for configuration file
    if not opts.conffile:
        parser.error("Cannot proceed without configuration file")

    # Check for sequence file
    if not opts.seqfile:
        parser.error("Cannot proceed without sequence file")

    return opts


def main():
    opts = get_options()
    # Read in the configuration file and set up filesystem
    config = modpipe.config.read_file(file(opts.conffile, 'r'))
    fs = modpipe.filesystem.FileSystem(config)

    unq = modpipe.sequence.UniqueFile()
    f = modpipe.sequence.FASTAFile()
    for (count, seq) in enumerate(f.read(file(opts.seqfile))):
        seq.clean()
        if count > 0 and count % 1000 == 0:
            print "AddSeqMP__M> Processed %d sequences" % count
        code = seq.code
        id = fs.add_sequence(seq, f)
        unq.add_sequence(id, code)
    unq.write(file(unq.file_name_from_seqfile(opts.seqfile), 'w'))

if __name__ == '__main__':
    main()
