#!/usr/bin/perl -w # Command line usage: perlIntegration.ploriginal_sequence_filedirection_fileoutput_file # Original_sequence_file is the output file produced by ESTNameConversion.pl. # Direction_file is the file made from the Phrap output file with the following format: # Contigxxx # ESTxxxx.xx # ESTxxxx.xx # ESTxxxx.xx # The funtion of this script is to integrate the sequences into the correct positions under each entry of the direction file. # The output file looks like: # Contigxxx # >ESTxxxx.xx # sequence data # >EStxxxx.xx # sequence data # The output file is ready for the standalone BLAST search against downloaded NCBI nr database. my @Names; my @NameLineNumber; my @Seqs; my $LineNumber = 1; open (FILE1, "$ARGV[0]") or die "Cannot open first input file: $!"; while () { chomp; if ($_ =~ /^>/) { push @Names, "$_"; push @NameLineNumber, "$LineNumber"; push @Seqs, "*"; } else { push @Seqs, "$_"; } $LineNumber += 1; } push @NameLineNumber, "$LineNumber"; close FILE1 or die "Cannot close first input file: $!"; open (FILE2, "$ARGV[1]") or die "Cannot open second input file: $!"; my $Names = @Names; my @ESTName; my @Line; my @SeqLength; my @File2; while () { chomp; if ($_ =~ /^EST(\d+)/) { my $temp = $1; for (my $i = 0; $i < $Names; $i ++) { if ($Names[$i] =~ /($temp)/){ my $seqlength = $NameLineNumber[$i+1] - $NameLineNumber[$i] - 1; push @Line, "$NameLineNumber[$i]"; push @SeqLength, "$seqlength"; push @name, "$Names[$i]"; } } } push @File2, "$_"; } open OUTPUT, ">$ARGV[2]" or die "Cannot open output file: $!"; my $j =0; foreach (@File2) { if ($_ =~ /^EST/i) { print OUTPUT ">$name[$j]\n"; for (my $k = $Line[$j]; $k < ($Line[$j] + $SeqLength[$j]); $k ++) { print OUTPUT "$Seqs[$k]\n"; } $j += 1; } else { print OUTPUT "$_\n"; } } close OUTPUT or die "Cannot close output file: $!";