import unittest
import os
import modpipe.test
import modpipe.binaries
import subprocess

class CDHitTests(modpipe.test.TestCase):

    def test_cd_hit(self):
        """Check CD-HIT"""
        expected_codes = ['1apxA', '1ecsA', '1llp', '1n8yC', '1sj2A']
        expected_out = ['total seq: 10', 'longest and shortest : 717 and 120',
                        'Total letters: 3605', 'Sequences have been sorted', '',
                        '10 finished\t5 clusters']

        binary = modpipe.binaries.get_cd_hit()
        base = 'mp-cdhit'
        p = subprocess.Popen([binary, '-i', '../db/test-pdb.fsa',
                              '-o', base, '-n', '5', '-c', '0.90', '-B', '1',
                              '-M', '1000'],
                             stdout=subprocess.PIPE)
        for line, expected_line in zip(p.stdout, expected_out):
            self.assertEqual(line.rstrip('\r\n'), expected_line)
        self.require_clean_exit(p)
        codes = []
        for line in open(base):
            if line.startswith('>'):
                codes.append(line[1:].rstrip('\r\n'))
        self.assertEqual(codes, expected_codes)
        os.unlink(base)
        for f, nlines in ((base + '.bak.clstr', 10), (base + '.clstr', 15)):
            wc = len(file(f).readlines())
            self.assertEqual(wc, nlines)
            os.unlink(f)

if __name__ == '__main__':
    unittest.main()
