import unittest
import os
import modpipe.test
import modpipe.binaries
import subprocess

class CDHitTests(modpipe.test.TestCase):

    def test_cd_hit(self):
        """Check CD-HIT"""
        expected_codes = ['1bwyA', '1cbiA', '1ecsA', '1f9zA', '1fdqA',
                          '1fljA', '1ftpA', '1g7nA', '1gglA', '1h0zA',
                          '1hcbA', '1itkA', '1iynA', '1j4wA', '1jd0A',
                          '1keqA', '1kllA', '1kopA', '1kqwA', '1llpA',
                          '1lpjA', '1lugA', '1mwvA', '1n8yC', '1o8vA',
                          '1oafA', '1opaA', '1pmpA', '1qipA', '1rj5A',
                          '1sj2A', '1u2kA', '1ub2A', '1v9eA', '1vyfA',
                          '1zbyA', '1zncA', '1b56A', '1mdcA', '1hmrA',
                          '1cbsA', '1crbA']
        expected_out = ['total seq: 49', 'longest and shortest : 717 and 68',
                        'Total letters: 12148', 'Sequences have been sorted', '',
                        '49 finished\t42 clusters']

        binary = modpipe.binaries.get_cd_hit()
        base = 'mp-cdhit'
        p = subprocess.Popen([binary, '-i', '../db/test-pdb.fsa',
                              '-o', base, '-n', '5', '-c', '0.90', '-B', '1',
                              '-M', '1000'],
                             stdout=subprocess.PIPE, universal_newlines=True)
        for line, expected_line in zip(p.stdout, expected_out):
            self.assertEqual(line.rstrip('\r\n'), expected_line)
        self.require_clean_exit(p)
        codes = []
        for line in open(base):
            if line.startswith('>'):
                codes.append(line[1:].rstrip('\r\n'))
        self.assertEqual(codes, expected_codes)
        os.unlink(base)
        for f, nlines in ((base + '.bak.clstr', 49), (base + '.clstr', 91)):
            wc = len(open(f).readlines())
            self.assertEqual(wc, nlines)
            os.unlink(f)

if __name__ == '__main__':
    unittest.main()
