#!/usr/bin/perl

use strict;
use FileHandle;

my $inputFile = $ARGV[0];
my $outputFile = $ARGV[1];

unless ($inputFile && $outputFile){
    print STDERR "usage: perl makeSmpToGiMap.pl <inputFile> <outputFile>\n";
    exit(1);
}

my $inputFh = FileHandle->new("<" . $inputFile) || die "could not open input $inputFile\n";

my $allInfo;
my $smpId;
my $lineCount = 1;
my $recordNumber;

while (<$inputFh>){
    chomp;
    my $line = $_;
    next if ($line =~ /Designations/);
    if ($line =~ /^(\d+)\:\s(\S+)/){
	$recordNumber = $1;
	$smpId = $2;
	$lineCount = 1;
    }
    else {
	$allInfo->{$recordNumber}->{$smpId}->{$lineCount} = $line;

	$lineCount++;
    }
}

my $smpToGeneIdMap;
my $geneId;
foreach my $recordNumber (keys %$allInfo){
    my $smpIds = $allInfo->{$recordNumber};
    foreach my $smpId (keys %$smpIds){
	
	next unless ($smpId =~ /\S/);
	my $lineNumbers = $smpIds->{$smpId};
	
	#name line
	my $name = $lineNumbers->{1};
	
	#alias line
	my $otherAliasLine = $lineNumbers->{2};
	if ($otherAliasLine =~ /^\sOther\sAliases\:\s(\S+)/){
	    my $alias = $1;
	    unless ($smpId eq $alias){
		if ($alias =~ /^Smp/){
		    
		    $smpId = $alias;
		}
		
		else {
		    print STDERR "skip $smpId entirely\n";
	    }
	    }
	}
	else {
	    print STDERR "did not parse smp id $smpId other alias $otherAliasLine\n";
	}
	
	#gene id line
	my $geneIdLine = $lineNumbers->{3};
	if ($geneIdLine =~ /^\sGeneID\:\s(\d+)/){
	    $geneId = $1;
	    $smpToGeneIdMap->{$smpId}->{$geneId} = "valid";
	    print STDERR "map smp id $smpId to gene id $geneId\n";
	}
	else {
	    print STDERR "did not parse smp id $smpId gene id line $geneIdLine\n";
	}
    
	my $discontinuedLine = $lineNumbers->{4};
	if ($discontinuedLine){
	    if ($discontinuedLine eq " This record was discontinued."){
		$smpToGeneIdMap->{$smpId}->{$geneId} = "discontinued";
		print STDERR "\tsubsequently discontinued\n";
	    }
	    else {
		print STDERR "did not get expected discontinued string for smp $smpId (discontinued $discontinuedLine)\n";
	    }
	}
	
	my $bonusLine = $lineNumbers->{5};
	if ($bonusLine){
	    print STDERR "bonus line for smp $smpId: $bonusLine\n";
	}
    }
}
 
my $outFh = FileHandle->new(">" . $outputFile) || die "could not open $outputFile for writing\n";
  
foreach my $smpId (keys %$smpToGeneIdMap){
    my $geneIds = $smpToGeneIdMap->{$smpId};
    foreach my $geneId (keys %$geneIds){
	my $status = $geneIds->{$geneId};
	print $outFh "$smpId\t$geneId\t$status\n";
    }
}
    
	
			    

	
