#!/usr/bin/perl

###########################################################
#   MIFS                                                  # 
#   implementation of Battiti algorithm                   #
#   written by Rachel Karchin 2004                        #
#   rachelk@salilab.org                                   #
###########################################################      


&process_command_line;

print "Using beta=$beta and Objective function $objectiveflag\n";

$MINUS_INFINITY=-100000;

#get candidate features and put in the f set
while($line=<STDIN>){
    chomp $line;
    $feathash{$line}=1;
    push @featlist, $line; #we'll use both of these data structures
}

#get the MI score of each feature with the requested mutation class description
#also get the Feature entropy for use with Kwak/Choi objective
foreach $candf (keys %feathash){
    $file = $feature_class_mi_dir . "/". $candf . "-MI";
    open(FILE, "$file") || die "Can't open $file for reading.\n";
    while($line=<FILE>){
        if($line =~/^FEATURE\s+entropy/){
            @toks = split(/\s+/,$line);
            my $candf_h = $toks[3];
            $featHhash{$candf}=$candf_h;  #H(F)
        }
        if($line =~/^Excess/){
            @toks = split(/\s+/,$line);
            my $candf_mi = $toks[3];
            $featmihash{$candf}=$candf_mi;  #I(C,F)
        }
    }
    close FILE;
}

#get the MI score of each feature with the other features


for ($i=0; $i<=$#featlist; $i++){
    
    for($j=0; $j<=$i; $j++){
        next if ($featlist[$i] eq $featlist[$j]);
        $file1 = $feature_feature_mi_dir . "/" . $featlist[$i] . "-" . $featlist[$j] . "-MI";
        if (-e $file1) {
            open(FILE, "$file1");
        } else {
            $file2 = $feature_feature_mi_dir . "/" . $featlist[$j] . "-" . $featlist[$i] . "-MI";
            open(FILE, "$file2") || die "Can't open $file1 or $file2 for reading.\n";
        }
        while($line=<FILE>){
            if($line =~/^Excess/){
                @toks = split(/\s+/,$line);
                my $candf_mi = $toks[3];
                $featpairmihash{$featlist[$i]}{$featlist[$j]} = $candf_mi;  #I(F,F)
                #for convenient retrieval
                $featpairmihash{$featlist[$j]}{$featlist[$i]} = $candf_mi;  #I(F,F)
            }
        }
        close FILE;
    }
}


#run greedy selection algorithm
foreach $cand1 (sort hashValueDescendingNum (keys(%featmihash))){
    push @sortedfeatures, $cand1;
}


%selectedfeathash = ();

#choice of first feature -- one with max I(C,F)

$selectedfeathash{$sortedfeatures[0]} = 1;

print "$sortedfeatures[0]\n";

delete ($feathash{$sortedfeatures[0]});


for ($i=0; $i<($nselect-1); $i++){
$best_objective = $MINUS_INFINITY;
    foreach $cf (keys %feathash){
        if($objectiveflag == 1){$objective = &compute_objective1($cf);}
        if($objectiveflag == 2){$objective = &compute_objective2($cf);}

        if($objective > $best_objective){
            $best_objective = $objective;
            $best_feature = $cf;
        }
    } 
    $selectedfeathash{$best_feature} = 1;


print "$best_feature\n";
delete ($feathash{$best_feature});
}

sub compute_objective1{
    local ($candidate_f) = @_;
    local $I_C_F = $featmihash{$candidate_f};
    local $s;
    local $I_S_F_sum = 0;
    foreach $s (keys %selectedfeathash){
        $I_S_F_sum += $featpairmihash{$candidate_f}{$s};
        
    }
    local $obj = $I_C_F - ($beta * $I_S_F_sum);

    return $obj;
}

sub compute_objective2{
    local ($candidate_f) = @_;
    local $I_C_F = $featmihash{$candidate_f};
    local $s;
    local $I_S_F_sum = 0;
    local $I_C_S;
    local $w;
    foreach $s (keys %selectedfeathash){
        $I_C_S = $featmihash{$s};
        $H_S = $featHhash{$s};
        $w = $I_C_S / ($H_S * 1.0);
        $I_S_F_sum += $w * $featpairmihash{$candidate_f}{$s};
    }
    local $obj = $I_C_F - ($beta * $I_S_F_sum);
    return $obj;
}


sub hashValueDescendingNum {
    $featmihash{$b} <=> $featmihash{$a};
}

sub process_command_line {
    $argc = 0;
    
    while($argc <= $#ARGV){
        $_ = $ARGV[$argc++];
      SWITCH: {
          if (/^-feature_class_mi_dir/){$feature_class_mi_dir = $ARGV[$argc++];
                                        last SWITCH;
                                    }
          if (/^-feature_feature_mi_dir/){$feature_feature_mi_dir = $ARGV[$argc++];
                                          last SWITCH;
                                      }
          if (/^-nselect/){$nselect = $ARGV[$argc++];
                           last SWITCH;
                       }
          if (/^-beta/){$beta = $ARGV[$argc++];
                        last SWITCH;
                    }
          if(/^-objective/) {$objectiveflag = $ARGV[$argc++];
                            last SWITCH;
                        }
      }
    }
    if(!defined $feature_class_mi_dir || !defined $feature_feature_mi_dir || !defined $nselect || !defined $beta || !defined $objectiveflag)
    {
        &usage; exit(-1); 
        }
}

sub usage{
    print "Usage: MIFS -feature_class_mi_dir /alto2/home/rachelk/SNP-features/data/feature-class/MI -feature_feature_mi_dir /alto2/home/rachelk/SNP-features/correlation -nselect 7  -beta 0.5 -objective <1,2> < /alto2/home/rachelk/SNP-features/data/all-features\n";
    print "   objective=1 is the original Battiti objective function\n";
    print "   objective=2 is a modified objective function from Kwak and Choi\b";
}
