#!/usr/bin/perl

use strict;
use DBI;
use FileHandle;


#Gets all modbase seq ids from the latest schisto run, and for each gets all database_ids listed in nr.
#Outputs to <outputFile> one line for each mapping

my $outputFile = $ARGV[0];

unless ($outputFile){
    print STDERR "usage: perl makeSmpModbaseMap.pl <outputFile>\n";
    exit(1);
}

my $dbh = &getSynonymsDbh();

my $query = "select distinct seq_id from modbase.newmodels where run = 'schisto_410'";

my $sth = $dbh->prepare($query);
$sth->execute();

my $allSeqIds;

while (my ($seqId) = $sth->fetchrow_array()){
    $allSeqIds->{$seqId} = 1;
}

my $nrQuery = "select database_name, database_id from nr where seq_id = ?";
my $nrSth = $dbh->prepare($nrQuery);

my $allDatabaseIds;
foreach my $seqId (keys %$allSeqIds){
    $nrSth->execute($seqId);

    while (my ($dbName, $dbId) = $nrSth->fetchrow_array()){
	$allDatabaseIds->{$seqId}->{$dbName}->{$dbId} = 1;
    }
}

my $outFh = FileHandle->new(">" . $outputFile) || die "could not open $outputFile for writing\n";

my $seqCount = 0;
my $missingGiCount = 0;
my $missingSmpCount = 0;
foreach my $seqId (keys %$allDatabaseIds){
    my $hasGi = 0;
    my $hasSmp = 0;
    $seqCount++;
    my $dbNames = $allDatabaseIds->{$seqId};
    foreach my $dbName (keys %$dbNames){
	if ($dbName eq "CU"){
	    $hasSmp = 1;
	}
	if ($dbName eq "GI"){
	    $hasGi = 1;
	}
	my $dbIds = $dbNames->{$dbName};
	foreach my $dbId (keys %$dbIds){
	    print $outFh "$seqId\t$dbName\t$dbId\n";
	}
    }
    $missingGiCount++ unless $hasGi;
    $missingSmpCount++ unless $hasSmp;
}

print STDERR "all done; missing gi: $missingGiCount missing smp: $missingSmpCount\n";


sub getSynonymsDbh{

    my $dbString = "DBI:mysql:database=modbase_synonyms:hostname=modbase";
    my $username = "modbase";
    my $password = "modbasesecret";
    
    print STDERR "connecting\n";
    
    my $dbh = DBI->connect( $dbString, $username, $password, {RaiseError => 1});
    
    return $dbh;


}
