#!/usr/bin/perl

# filter_blast.pl
# 
# The purpose of this script is to filter the results in a tabular BLAST report
# so that only the top hit for each query is given. The program takes as its command
# line input:
# in - the input tabular BLAST report 
# out - the output BLAST report 
#
# Author : Brittney (Hinds) Keel
#
# Date created :  27 June 2013
# Last modified : 3 July 2013

use strict;
use Getopt::Long;


# First retrieve the command line options.

my $infile;
my $outfile;
my $help;

if (@ARGV > 0) { # if command line options are supplied
  my $opt = GetOptions('help!', \$help,
                       'in:s',  \$infile,
                       'out:s', \$outfile);
} else { 
  print "Usage of this script:\n";
  print "Please supply command line options.\n";
  print "Example : ./filter_blast.pl -in inblast -out outblast\n";
  exit;
}

if ($help) { # if the user requests help
  print "Usage of this script:\n";
  print "Please supply command line options.\n";
  print "Example : ./filter_blast.pl -in inblast -out outblast\n";
  exit;
}


# Read in input BLAST file, and add to output file only top hit for
# each query sequence.
        
my $in = '<'."$infile";
my $out = '>'."$outfile";
my $OUTPUT; 
my $INPUT;
open ($INPUT, $in) or die "Can't open input file: $in";
open ($OUTPUT, $out) or die "Can't open output file: $out";

my %pairs;
while (my $line = readline ($INPUT)) {	  
   my @elements = split /\s+/, $line;
   my $query_prot = $elements[0];
   my $subject_prot = $elements[1];

   if (!($pairs{$query_prot}{$subject_prot})) { 
     print $OUTPUT "$line";
   }

   $pairs{$query_prot}{$subject_prot} = 1;
}

close($INPUT) or die "Can't close input file: $in";
close($OUTPUT) or die "Can't close output file: $out because $!";

