#!/usr/bin/perl -w

use strict;
use Getopt::Long;
use Cwd;

my $version = 0.2;

my $q = 20;
my $time = 0;
my $force = 0;
my $noNorm = 0;
my $normCov = 50;
my $species = "";
my $single = "";
my $reads1 = "";
my $reads2 = "";
my $baseDir = getcwd;
my $verFlag;
my $help = 0;
my $h = 0;
my $pe = 0;

GetOptions(	"assemblyName=s" => \$species,
			"q=i" => \$q,
			"time" => \$time,
			"force" => \$force,
			"noNorm" => \$noNorm,
			"normCov" => \$normCov,
			"single=s" => \$single,
			"reads1=s" => \$reads1,
			"reads2=s" => \$reads2,
			"outputDir=s" => \$baseDir,
			"help" => \$help,
			"h" => \$h,
			"version" => \$verFlag
);

if ($h or $help or not ($single or ($reads1 and $reads2))) {
	print "Please specify either a single merged file of single end reads (using --single) or two merged files for paired-end reads (using --reads1 and --read2)\n";
	print "\nOther options:\n--species\t\tSpecify the species being assembled to help organize output files\n";
	print "--q\t\t\tChange q-value threshold for reads filtering (default q20)\n";
	print "--baseDir\t\tChange base directory for output files (default is current working directory)\n";
	print "--force\t\tIgnore the log file and always run entire pipeline\n";
	print "--noNorm\t\tSkip the digital normalization of filtered reads\n";
	print "--normCov\t\tChange the max kmer coverage for digital normalization. Lowering coverage speeds up assembly time, but increases risk of artifacts (default 50)\n";
	print "--time\t\tRecords the amount of time each stage of the pipeline takes\n";
	print "--version\t\tPrints pipeline version and dies\n";
	print "--h or --help\tPrints this message and quits\n\n";
	die;
}

if ($verFlag) {
	print "Pipeline version $version\n";
	die;
}

print "Checking read length\n";
my $readLength = 0;
if ($single) {
	print "Checking single end reads\n";
	$pe = 0;
	my $S;
	open ($S, $single) or die "Can't open single end reads at $single, please check the file location: $!\n";
	while (my $line = readline($S)) {
		next unless $line =~ /^@/;
		my $r = readline($S);
		chomp $r;
		$readLength = length($r);
		print "Detected read length of $readLength\n";
		last;
	}
	close $S;
}

if ($reads1 and $reads2) {
	print "Checking paired-end reads\n";
	$pe = 1;
	my ($P1, $P2);
	open ($P1, $reads1) or die "Can't open paired end reads 1 at $reads1, please check the file location: $!\n";
	open ($P2, $reads2) or die "Can't open paired end reads 2 at $reads2, please check the file location: $!\n";
	my ($rl1, $rl2);
	while (my $line = readline($P1)) {
		next unless $line =~ /^@/;
		my $r = readline($P1);
		chomp $r;
		$rl1 = length($r);
		print "Detected read length of $rl1 for reads1\n";
		last;
	}
	while (my $line = readline($P2)) {
		next unless $line =~ /^@/;
		my $r = readline($P2);
		chomp $r;
		$rl2 = length($r);
		print "Detected read length of $rl2 for reads2\n";
		last;
	}
	die "Can't determine read length, please check the input files" unless ($rl1 and $rl2);
	if ($rl1 != $rl2) {
		warn "Read lengths in $reads1 and $reads2 do not match.  Please check the files\n";
	}
	$readLength = $rl1;
	close $P1;
	close $P2;
}

my @q = split /,/, $q;

my $filReads = "$baseDir/FilteredReads/$species";
my $normReads = "$baseDir/NormalizedReads/$species";
my $asReads = "$baseDir/AssembledReads/$species";
my $runDir = getcwd;
my $log = "$asReads/log.txt";
my ($LOG, $TIME);
my %status;

print "Creating output directories\n";

if (-d "$baseDir") {
	print "Found $baseDir\n";
} else {
	mkdir ("$baseDir");
	print "Made $baseDir\n";
}

if (-d "$baseDir/AssembledReads/") {
	print "Found $baseDir/AssembledReads/\n";
} else {
	mkdir ("$baseDir/AssembledReads/");
	print "Made $baseDir/AssembledReads/\n";
}

if (-d "$baseDir/AssembledReads/$species/") {
	print "Found $baseDir/AssembledReads/$species/\n";
} else {
	mkdir ("$baseDir/AssembledReads/$species/");
	print "Made $baseDir/AssembledReads/$species/\n";
}

print "Opening log\n";
if (-e $log and $force != 1) {
	open ($LOG, "$log") or die "$log exists, but can't be read: $!\n";
	while (my $line = readline($LOG)) {
		chomp $line;
		next unless $line;
		
		my ($pr, $st) = split /\t/, $line;
		$status{$pr} = $st;
	}
	close ($LOG) or die "Can't close $log: $!\n";
	open ($LOG, ">>$log") or die "Can't append $log: $!\n";
} else {
	open ($LOG, ">$log") or die "Can't write to $log: $!\n";
}

my %finished;
print "Checking previously completed jobs\n";
foreach my $pr (sort {$a cmp $b} keys %status) {
	my @info = split /\-/, $pr;
	my $con;
	if ($info[2]) {
		$con = "$info[1]-$info[2]";
	} else {
		$con = $info[1];
	}
	
	if ($finished{$info[0]}) {
		$finished{$info[0]} .= ",$con";
	} else {
		$finished{$info[0]} = $con;
	}
}

if ($time) {
	open ($TIME, ">$asReads/time.txt");
}

my $efStartTime = time;
foreach my $q (@q) {
	if ($status{"erneFilter-q$q"}) {
		print "Previously finished erne-filter-q$q, skipping this run\n";
	} else {	
		if (-d "$filReads") {
			print "Found $filReads\n";
		} else {
			mkdir ("$filReads");
			print "Made $filReads\n";
		}
		if (-d "$filReads/q$q") {
			print "Found $filReads/q$q\n";
		} else {
			mkdir ("$filReads/q$q");
			print "Made $filReads/q$q\n";
		}
		if ($pe) {
			system("erne-filter --query1 $reads1 --query2 $reads2 --output-prefix $filReads/q$q/merged --ultra-sensitive --force-standard");
		} else {
			system("erne-filter --query1 $single --output-prefix $filReads/q$q/merged --ultra-sensitive --force-standard");
		}
		print $LOG "erneFilter-q$q\t1\n";
	}
}
my $efEndTime = time;
my $efRunTime = $efEndTime - $efStartTime;
print $TIME "ErneFilter:\t$efRunTime\n" if $time;

my $khStartTime = time;
my $finK = $finished{"khmer"};
$finK = "none" unless $finK;
foreach my $q (@q) {		
	if (-d "$normReads") {
		print "Found $normReads\n";
	} else {
		mkdir ("$normReads");
		print "Made $normReads\n";
	}
}

if ($finK =~ /q$q/) {
	print "Previously finished khmer, skipping this run\n";
} else {
	if ($pe) {
		print "Interleaving reads\n";
		system("interleave-reads.py  -o $filReads/q$q/mergedI.fastq --force $filReads/q$q/merged_1.fastq $filReads/q$q/merged_2.fastq");	
		print "Normalizing reads\n";
		if ($noNorm) {
			system("cp $filReads/q$q/mergedI.fastq $normReads/mergedI.fq");	
		} else {
			system("normalize-by-median.py -o $normReads/mergedI.fq -x 1000000000 -C $normCov --n_tables 99 --force --ksize 32 $filReads/q$q/mergedI.fastq");	
		}
		print "Splitting reads\n";
		chdir("$normReads/");
		system("extract-paired-reads.py $normReads/mergedI.fq");
		system("split-paired-reads.py --force $normReads/mergedI.fq.pe");
		system("mv $normReads/mergedI.fq.pe.1 $normReads/merged.fq.1");
		system("mv $normReads/mergedI.fq.pe.2 $normReads/merged.fq.2");
		system("mv $normReads/mergedI.fq.pe $normReads/mergedI.fq");
		system("fastq-to-fasta.py -o mergedI.fa mergedI.fq");
		system("fastq-to-fasta.py -o merged.fa.1 merged.fq.1");
		system("fastq-to-fasta.py -o merged.fa.2 merged.fq.2");
	} else {
		if ($noNorm) {
			system("cp $filReads/q$q/merged_1.fastq $normReads/merged.fq");
		} else {
			system("normalize-by-median.py -o $normReads/merged.fq -x 1000000000 -C 50 --n_tables 99 --force $filReads/q$q/merged_1.fastq");
		}
		system("fastq-to-fasta.py -o $normReads/merged.fa $normReads/merged.fq");
	}
	print $LOG "khmer-q$q\t1\n";
}
my $khEndTime = time;
my $khRunTime = $khEndTime - $khStartTime;
print $TIME "Khmer:\t$khRunTime\n" if $time;
chdir("$runDir/");

my $soStartTime = time;

my $fq = "$normReads/merged.fq";
my $fqI = "$normReads/mergedI.fq";
my $kMin = 15;
my $kMax;
if ($readLength < 127) {
	$kMax = $readLength;
} else {
	$kMax = 127;
}

my $CON;

if (-d "$asReads/SOAP/") {
	print "Found $asReads/SOAP/\n";
} else {
	mkdir ("$asReads/SOAP/");
	print "Made $asReads/SOAP/\n";
}

my $finS = $finished{"soap"};
$finS = "none" unless $finS;
for (my $k = $kMin; $k <= $kMax; $k += 4) {
	if ($finS =~ /q$q-k$k/) {
		print "Previously finished soap-q$q-k$k, skipping this run\n";
	} else {
		if (-d "$asReads/SOAP/k$k/") {
			print "Found $asReads/SOAP/$k/\n";
		} else {
			mkdir ("$asReads/SOAP/k$k/");
			print "Made $asReads/SOAP/$k/\n";
		}

		open ($CON, ">SOAP_Conf.txt") or die "Can't open configuration file: $!\n";
		print $CON "#maximal read length\nmax_rd_len=$readLength\n[LIB]\n#maximal read length in this lib\nrd_len_cutof=150\n#average insert size\navg_ins=150\n#if sequence needs to be reversed \nreverse_seq=0\n#in which part(s) the reads are used\nasm_flags=3\n#minimum aligned length to contigs for a reliable read location (at least 32 for short insert size)\nmap_len=32\n";
		if ($pe) {
			print $CON "#fastq file for read 1\nq1=$fq.1\nq2=$fq.2\n";
		} else {
			print $CON "#fastq file for read 1\nq=$fq\n";
		}
		close ($CON);

		system ("SOAPdenovo-Trans-127mer all -s SOAP_Conf.txt -K $k -o $asReads/SOAP/k$k -F -p 4");
		
		print $LOG "soap-q$q-k$k\t1\n";
	}
}
my $soEndTime = time;
my $soRunTime = $soEndTime - $soStartTime;
print $TIME "SOAP:\t$soRunTime\n" if $time;

my $spStartTime = time;

$kMin = 19;
if ($readLength < 71) {
	$kMax = $readLength;
} else {
	$kMax = 71;
}

if (-d "$asReads/SPAdes/") {
	print "Found $asReads/SPAdes/\n";
} else {
	mkdir ("$asReads/SPAdes/");
	print "Made $asReads/SPAdes/\n";
}


my $finSp = $finished{"SPAdes"};
$finSp = "none" unless $finSp;
for (my $k = $kMin; $k <= $kMax; $k += 4) {
	if ($finSp =~ /q$q-k$k/) {
		print "Previously finished rnaspades-q$q-k$k, skipping this run\n";
	} else {
		if ($pe) {
			system("rnaspades.py -k $k -o $asReads/SPAdes/k$k --12 $fqI -m 500 -t 4");
		} else {
			system("rnaspades.py -k $k -o $asReads/SPAdes/k$k -s $fq -m 500 -t 4")
		}
		
		print $LOG "SPAdes-q$q-k$k\t1\n";
	}
}

my $spEndTime = time;
my $spRunTime = $spEndTime - $spStartTime;
print $TIME "SPAdes:\t$spRunTime\n" if $time;

foreach my $q (@q) {
	my $trStartTime = time;
	if ($status{"trinity-q$q"}) {
		print "Previously finished trinity-q$q, skipping this run\n";
	} else {
		if ($pe) {
			system("Trinity --no_version_check --seqType fq --max_memory 250G --CPU 4 --output $asReads/Trinity/trinity --full_cleanup --left $fq.1 --right $fq.2");
		} else {
			system("Trinity --no_version_check --seqType fq --max_memory 250G --CPU 4 --output $asReads/Trinity/trinity --full_cleanup --single $fq");
		}
		print $LOG "trinity-q$q\t1\n";
	}
	my $trEndTime = time;
	my $trRunTime = $trEndTime - $trStartTime;
	print $TIME "Trinity:\t$trRunTime\n" if $time;
}
	
my $idStartTime = time;
if ($status{"idba-q$q"}) {
	print "Previously finished idba_trans, skipping this run\n";
} else {
	if ($pe) {
		system("idba_tran -o $asReads/idba/ -l $normReads/mergedI.fa --num_threads 4 --max_isoforms 50");
	} else {
		system("idba_tran -o $asReads/idba/ -l $normReads/merged.fa --num_threads 4 --max_isoforms 50");
	}
	print $LOG "idba-q$q\t1\n";
}
my $idEndTime = time;
my $idRunTime = $idEndTime - $idStartTime;
print $TIME "idbaTrans:\t$idRunTime\n" if $time;

my ($IN, $OUT, $OUTAA, $TMP, $SPEC);
my $meStartTime = time;
if ($status{"merged"}) {
	print "Previously finished merging transcrispts, skipping this run\n";
} else {

	open ($OUT, ">$asReads/mergedTranscripts.fa\n") or die "Can't open output: $!\n";
	system("touch mergedTranscripts.fa.faa");

	my $t;
	my %spec;

	my @idbaK = (20,30,40,50,60);
	foreach my $k (@idbaK) {
		if (-e "$asReads/idba/transcript-$k.fa") {
			open ($TMP, ">$asReads/mergingTmp.fa") or die "Can't open tmp file: $!\n";
			print "Found $asReads/idba/transcript-$k.fa, adding to merged transcripts\n";
			open ($IN, "$asReads/idba/transcript-$k.fa") or warn "Can't open $asReads/idba/transcripts-$k.fa";
			my $lt = 0;
			my ($id, $seq);
			while (my $line = readline($IN)) {
				if ($line =~ /^>/) {
					$line =~ s/^>/>idbaK$k-q$q-/;
					if (length($line) > 50) {
						$line = substr($line, 0, 50);
						$line .= "\n";
					}
					$spec{"idba"}{$id} = $seq if $seq;
					$id = $line;
					$seq = "";
					print $OUT $line;
					print $TMP $line;
					$lt++;
					$t++;
				} else {
					$seq .= $line;
					print $OUT $line;
					print $TMP $line;
				}
			}
			$spec{"idba"}{$id} = $seq if $seq;
			close($IN);
			close($TMP);
			
		 	chdir($asReads);
			system("ORFfinder -in mergingTmp.fa -out mergingTmpORFs.fa");
			 	
			getLongestORF();
				
		 	system("cat mergingTmpORFs.fa.faa >> mergedTranscripts.fa.faa");
		 	chdir($runDir);
		 	
			print "Successfully added $lt contigs\n";
		}
	}

	if (-e "$asReads/Trinity/trinity.Trinity.fasta") {
		open ($TMP, ">$asReads/mergingTmp.fa") or die "Can't open tmp file: $!\n";
		print "Found $asReads/Trinity/trinity.Trinity.fasta, adding to merged transcripts\n";
		open ($IN, "$asReads/Trinity/trinity.Trinity.fasta") or warn "Can't open $asReads/Trinity/trinity.Trinity.fasta: $!\n";
		my $lt = 0;
		my ($id, $seq);
		while (my $line = readline($IN)) {
			if ($line =~ /^>/) {
				# $line =~ s/^>/>Trinity-q$q/;
				if (length($line) > 50) {
					$line = substr($line, 0, 50);
					$line .= "\n";
				}
				$spec{"Trinity"}{$id} = $seq if $seq;
				$id = $line;
				$seq = "";
				print $OUT $line;
				print $TMP $line;
				$lt++;
				$t++;
			} else {
				$seq .= $line;
				print $OUT $line;
				print $TMP $line;
			}
		}
		close $IN;
		close($TMP);
		$spec{"Trinity"}{$id} = $seq if $seq;
					
		chdir($asReads);
		system("ORFfinder -in mergingTmp.fa -out mergingTmpORFs.fa");
		
		getLongestORF();
		
		system("cat mergingTmpORFs.fa.faa >> mergedTranscripts.fa.faa");
		chdir($runDir);
		
		print "Successfully added $lt contigs\n";
	}


	for (my $k = $kMin; $k <= $kMax; $k += 4) {
		

		if (-e "$asReads/SOAP/k$k.scafSeq") {
			open ($TMP, ">$asReads/mergingTmp.fa") or die "Can't open tmp file: $!\n";
			print "Found $asReads/SOAP/k$k.scafSeq, adding to merged transcripts\n";
			open ($IN, "$asReads/SOAP/k$k.scafSeq") or warn "Can't open $asReads/SOAP/k$k.scafSeq: $!\n";
			my $lt = 0;
			my ($id, $seq);
			while (my $line = readline($IN)) {
				if ($line =~ /^>/) {
					$line =~ s/^>/>SOAPdenovoK$k-q$q-/;
					if (length($line) > 50) {
						$line = substr($line, 0, 50);
						$line .= "\n";
					}
					$spec{"SOAPdenovo"}{$id} = $seq if $seq;
					$id = $line;
					$seq = "";
					print $OUT $line;
					print $TMP $line;
					$lt++;
					$t++;
				} else {
					$seq .= $line;
					print $OUT $line;
					print $TMP $line;
				}
			}
			$spec{"SOAPdenovo"}{$id} = $seq if $seq;
			close $IN;
			close($TMP);
			
		 	chdir($asReads);
		 	system("ORFfinder -in mergingTmp.fa -out mergingTmpORFs.fa");
		 	
			getLongestORF();
			
		 	system("cat mergingTmpORFs.fa.faa >> mergedTranscripts.fa.faa");
		 	chdir($runDir);
		 	
			print "Successfully added $lt contigs\n";
		}
	
		if (-e "$asReads/SPAdes/k$k/transcripts.fasta") {
			open ($TMP, ">$asReads/mergingTmp.fa") or die "Can't open tmp file: $!\n";
			print "Found $asReads/SPAdes/k$k/transcripts.fasta, adding to merged transcripts\n";
			open ($IN, "$asReads/SPAdes/k$k/transcripts.fasta") or warn "Can't open $asReads/SPAdes/k$k/transcripts.fasta: $!\n";
			my $lt = 0;
			my ($id, $seq);
			while (my $line = readline($IN)) {
				if ($line =~ /^>/) {
					$line =~ s/^>/>SPAdesK$k-q$q-/;
					if (length($line) > 50) {
						$line = substr($line, 0, 50);
						$line .= "\n";
					}
					$spec{"SPAdes"}{$id} = $seq if $seq;
					$id = $line;
					$seq = "";
					print $OUT $line;
					print $TMP $line;
					$lt++;
					$t++;
				} else {
					$seq .= $line;
					print $OUT $line;
					print $TMP $line;
				}
			}
			$spec{"SPAdes"}{$id} = $seq if $seq;
			close $IN;
			close($TMP);
			
		 	chdir($asReads);
			system("ORFfinder -in mergingTmp.fa -out mergingTmpORFs.fa");
			
			getLongestORF();
				
		 	system("cat mergingTmpORFs.fa.faa >> mergedTranscripts.fa.faa");
		 	chdir($runDir);
		 	
			print "Successfully added $lt contigs\n";
		}
	}

	print "Printing assembler specific results:\n";
	foreach my $as (sort {$a cmp $b} keys %spec) {
		print "Printing merged results for $as:\n";
		open ($SPEC, ">$asReads/$as.fasta") or warn "Can't open $as output: $!\n";
		foreach my $g (sort {$a cmp $b} keys %{$spec{$as}}) {
			print $SPEC "$g\n$spec{$as}{$g}\n"
		}
	}

	print "Added a total of $t contigs to $asReads/mergedTranscripts.fa\n";
	print $LOG "merged\t1\n";
	close $OUT;
}

my $meEndTime = time;
my $meRunTime = $meEndTime - $meStartTime;
print $TIME "Merging:\t$meRunTime\n" if $time;# 
# 
# if ($status{"gm-q$q"}) {
# 	print "Previously finished GeneMarkS, skipping this run\n";
# } else {
# 	print "Running GeneMarkS to predict amino acid sequences:\n";
# 	chdir($asReads);
# 	system("gmsn.pl --faa --euk mergedTranscripts.fa");
# 	chdir($runDir);
# 	print $LOG "gm-q$q\t1\n";
# }


my ($FASTA, $PROT, $POUT, $LOUT, $SOUT, $SOUT4, $LOUT4,  $POUT4);

my $dir = "$asReads/";

open ($FASTA, "$dir/mergedTranscripts.fa") or die "Can't open $dir/mergedTranscripts.fa file: $!\n";
open ($PROT, "$dir/mergedTranscripts.fa.faa") or die "Can't open $dir/mergedTranscripts.fa.faa file: $!\n";
open ($SOUT, ">$dir/ConSemble3shortest.fasta") or die "Can't open nt output: $!\n";
open ($LOUT, ">$dir/ConSemble3longest.fasta") or die "Can't open nt output: $!\n";
open ($POUT, ">$dir/ConSemble3.fa.faa") or die "Can't open aa output: $!\n";
open ($SOUT4, ">$dir/ConSemble4shortest.fasta") or die "Can't open nt output: $!\n";
open ($LOUT4, ">$dir/ConSemble4longest.fasta") or die "Can't open nt output: $!\n";
open ($POUT4, ">$dir/ConSemble4.fa.faa") or die "Can't open aa output: $!\n";

my %fasta;
my ($seq, $id);
print "Finding consensus between assemblers:\n";
while (my $line = readline($FASTA)) {
	chomp $line;
	next unless $line;
	
	if ($line =~ /^>/) {
		$fasta{$id} = $seq if $seq;
		$seq = "";
		$line =~ s/^>//;
		my @info = split /\s+/, $line;
		$id = $info[0];
	} else {
		$seq .= $line;
	}
}
$fasta{$id} = $seq if $seq;

my (%prot, %match);
($seq, $id) = ("", "");
while (my $line = readline($PROT)) {
	chomp $line;
	next unless $line;
	
	if ($line =~ /^>/) {
		$prot{$id} = $seq if $seq;
		$line =~ s/^>//;
		my @info = split /\s+/, $line;
		$info[0] =~ s/^>//;
		if ($seq) {
			if ($match{$seq}) {
				$match{$seq} .= ",$id" unless $match{$seq} =~ /$id/;
			} else { 
				$match{$seq} = $id 
			}
		}
		$id = $info[0];
		$seq = "";
	} else {
		$seq .= $line;
	}
}
$fasta{$id} = $seq if $seq;

if ($seq) {
	if ($match{$seq} and $id) {
		$match{$seq} .= ",$id" unless $match{$seq} =~ /$id/;
	} else { 
		$match{$seq} = $id; 
	}
}

my (%fm_count, %counted);
my $t = 0;
my $c = 1;
foreach my $hit (sort {$a cmp $b} keys %match) {
	next if $counted{$hit};
	$counted{$hit} = 1;
	
	my $hits = $match{$hit};
	my @list = split /,/, $hits;
	
	my %as;
	foreach my $h (@list) {	
		my @ask = split /-/, $h;
		my @as = split /K/, $ask[0];
		$as{$as[0]} = 1;
	}
	my $as_list = join ",",  sort({$a cmp $b} keys %as);
	$fm_count{$as_list}++;
	
	my $denovoCount = 0;
	foreach my $a (sort {$a cmp $b} keys %as) {
		$denovoCount++ if ($a eq "SOAPdenovo" or $a eq "idba" or $a eq "SPAdes" or $a eq "Trinity");
	}
	if ($denovoCount >= 3) {
		my $longestLength = 0;
		my $shortestLength = 999999;
		my $longest = "";
		my $shortest = "";
		foreach my $h (@list) {
			next if $counted{$h};
			$counted{$h} = 1;
			my $clen = length($fasta{$h});
			if (length($fasta{$h}) > $longestLength) {
				$longestLength = length($fasta{$h});
				$longest = $h;
			}
			if (length($fasta{$h}) < $shortestLength) {
				$shortestLength = length($fasta{$h});
				$shortest = $h;
			}
		}
		next unless $longest;
		$t++;
		print $POUT ">contig_$c\n$prot{$longest}\n";
		print $POUT4 ">contig_$c\n$prot{$longest}\n" if $denovoCount >= 4;
		if ($longest) {
			print $LOUT ">contig_$c\n$fasta{$longest}\n";
			print $LOUT4 ">contig_$c\n$fasta{$longest}\n" if $denovoCount >= 4;
		}
		if ($shortest) {
			print $SOUT ">contig_$c\n$fasta{$shortest}\n";
			print $SOUT4 ">contig_$c\n$fasta{$shortest}\n" if $denovoCount >= 4;
		}
		$c++;
	}
}

#Uncomment this block to see the number of contigs shared between each assembler
#######################
# foreach my $al (sort {$a cmp $b} keys %fm_count) {
# 	print "$al\t$fm_count{$al}\n";
# }
#######################

print "Number of contigs in the final assembly: $t\n";

sub getLongestORF {
	my %tmpGenes;
	my ($tmpId, $tmpSeq);
	my ($TMP, $TMPOUT);
	open ($TMP, "mergingTmpORFs.fa") or die "Can't open mergingTmpORFs.fa\n";
	open ($TMPOUT, ">mergingTmpORFs.fa.faa") or die "Can't open mergingTmpORFs.fa.faa\n";

	while (my $line = readline($TMP)) {
		chomp $line;
		next unless $line;
		if ($line =~ /^>/) {
			if ($tmpSeq) {
				$tmpGenes{$tmpId} = "" unless $tmpGenes{$tmpId};
				$tmpGenes{$tmpId} = $tmpSeq if (length($tmpSeq) > length($tmpGenes{$tmpId}));
			}
			my @info = split /ORF\d+_/, $line;
			my @gn = split /:/, $info[1];
			$tmpId = $gn[0];
			$tmpSeq = "";
		} else {
			$tmpSeq .= $line;
		}
	}
	$tmpGenes{$tmpId} = $tmpSeq if (length($tmpSeq) > length($tmpGenes{$tmpId}));

	foreach my $g (sort {$a cmp $b} keys %tmpGenes) {
		print $TMPOUT ">$g\n$tmpGenes{$g}\n";
	}

	close ($TMP);
	close ($TMPOUT);
}
