#!/usr/bin/perl -w
use strict;
use BPlite;
use FAlite;
use Getopt::Std;
use vars qw(
$opt_c
$opt_b $opt_B
$opt_s $opt_S
$opt_p $opt_P
$opt_i $opt_I
$opt_r
);
############
# defaults #
############
my $MASK = '';
my $MIN_BITS = 1;
my $MAX_BITS = 1e200;
my $MIN_SCORE = 1;
my $MAX_SCORE = 1e200;
my $MIN_P = 0;
my $MAX_P = 1;
my $MIN_IDENTITY = 1;
my $MAX_IDENTITY = 100;
#########
# usage #
#########
my $usage = "
usage: xblast.pl [options] <
options: [default]
-c [$MASK]
-b [$MIN_BITS]
-B [$MAX_BITS]
-s [$MIN_SCORE]
-S [$MAX_SCORE]
-p [$MIN_P]
-P [$MAX_P]
-i [$MIN_IDENTITY]
-I [$MAX_IDENTITY]
-r switch to reverse-mask the sequence
";
###########################
# command line processing #
###########################
getopts('c:rb:b:s:S:p:P:v:');
die $usage unless @ARGV == 1;
$MASK = $opt_c if $opt_c;
$MIN_BITS = $opt_b if $opt_b;
$MAX_BITS = $opt_B if $opt_B;
$MIN_SCORE = $opt_s if $opt_s;
$MAX_SCORE = $opt_S if $opt_S;
$MIN_P = $opt_p if $opt_p;
$MAX_P = $opt_P if $opt_P;
$MIN_IDENTITY = $opt_i if $opt_i;
$MAX_IDENTITY = $opt_I if $opt_I;
my $REVERSE = $opt_r;
my ($query) = @ARGV;
#############################
# BPlite and FAlite objects #
#############################
open(FASTA, $query) or die;
my $BPlite = new BPlite::Multi(\*STDIN);
my $FAlite = new FAlite(\*FASTA);
##########################################
# iterate through both BPlite and FAlite #
##########################################
while(my $blast = $BPlite->nextReport, my $fasta = $FAlite->nextEntry) {
my $seq = $fasta->seq;
###############
# parse BLAST #
###############
while (my $sbjct = $blast->nextSbjct) {
while (my $hsp = $sbjct->nextHSP) {
###########
# filters #
###########
next if $hsp->bits < $MIN_BITS;
next if $hsp->bits > $MAX_BITS;
next if $hsp->score < $MIN_SCORE;
next if $hsp->score > $MAX_SCORE;
next if $hsp->P < $MIN_P;
next if $hsp->P > $MAX_P;
next if $hsp->percent < $MIN_IDENTITY;
next if $hsp->percent > $MAX_IDENTITY;
###########
# masking #
###########
my ($begin, $end) = ($hsp->queryBegin, $hsp->queryEnd);
($begin, $end) = ($end, $begin) if $begin > $end;
my $len = $end - $begin + 1;
substr($seq, $begin-1, $len) = lc(substr($seq, $begin-1, $len));
}
}
#######################
# output FASTA format #
#######################
if ($REVERSE) {$seq =~ tr/a-zA-Z/A-Za-z/}
if ($MASK) {$seq =~ s/[a-z]/$MASK/g}
print $fasta->def, "\n";
for(my $i=0;$i