BEGIN { %MODEMAP = ('BlastOutput' => 'result',
'Hit' => 'hit',
'Hsp' => 'hsp'
);
%MAPPING = (
'Hsp_bit-score' => 'bits',
'Hsp_score' => 'score',
'Hsp_evalue' => 'evalue',
'Hsp_pvalue' => 'pvalue',
'Hsp_query-from' => 'querystart',
'Hsp_query-to' => 'queryend',
'Hsp_hit-from' => 'hitstart',
'Hsp_hit-to' => 'hitend',
'Hsp_positive' => 'conserved',
'Hsp_identity' => 'identical',
'Hsp_gaps' => 'gaps',
'Hsp_hitgaps' => 'hitgaps',
'Hsp_querygaps' => 'querygaps',
'Hsp_qseq' => 'queryseq',
'Hsp_hseq' => 'hitseq',
'Hsp_midline' => 'homolseq',
'Hsp_align-len' => 'hsplen',
'Hsp_query-frame'=> 'queryframe',
'Hsp_hit-frame' => 'hitframe',
'Hit_id' => 'hitname',
'Hit_len' => 'hitlen',
'Hit_accession' => 'hitacc',
'Hit_def' => 'hitdesc',
'Hit_signif' => 'hitsignif',
'Hit_score' => 'hitscore',
'BlastOutput_program' => 'programname',
'BlastOutput_version' => 'programver',
'BlastOutput_query-def'=> 'queryname',
'BlastOutput_query-len'=> 'querylen',
'BlastOutput_query-acc'=> 'queryacc',
'BlastOutput_querydesc'=> 'querydesc',
'BlastOutput_db' => 'dbname',
'BlastOutput_db-len' => 'dbsize',
'BlastOutput_db-let' => 'dblets',
'BlastOutput_query-acc'=> 'queryacc',
'Iteration_iter-num' => 'iternum',
'Parameters_matrix' => { 'param' => 'matrix'},
'Parameters_expect' => { 'param' => 'expect'},
'Parameters_include' => { 'param' => 'include'},
'Parameters_sc-match' => { 'param' => 'match'},
'Parameters_sc-mismatch' => { 'param' => 'mismatch'},
'Parameters_gap-open' => { 'param' => 'gapopen'},
'Parameters_gap-extend'=> { 'param' => 'gapext'},
'Parameters_filter' => {'param' => 'filter'},
'Parameters_allowgaps' => { 'param' => 'allowgaps'},
'Statistics_db-len' => {'stat' => 'dbentries'},
'Statistics_db-let' => { 'stat' => 'dbletters'},
'Statistics_hsp-len' => { 'stat' => 'hsplength'},
'Statistics_query-len' => { 'stat' => 'querylength'},
'Statistics_eff-space' => { 'stat' => 'effectivespace'},
'Statistics_eff-spaceused' => { 'stat' => 'effectivespaceused'},
'Statistics_eff-dblen' => { 'stat' => 'effectivedblength'},
'Statistics_kappa' => { 'stat' => 'kappa' },
'Statistics_lambda' => { 'stat' => 'lambda' },
'Statistics_entropy' => { 'stat' => 'entropy'},
'Statistics_framewindow'=> { 'stat' => 'frameshiftwindow'},
'Statistics_decay'=> { 'stat' => 'decayconst'},
'Statistics_T'=> { 'stat' => 'T'},
'Statistics_A'=> { 'stat' => 'A'},
'Statistics_X1'=> { 'stat' => 'X1'},
'Statistics_X2'=> { 'stat' => 'X2'},
'Statistics_S1'=> { 'stat' => 'S1'},
'Statistics_S2'=> { 'stat' => 'S2'},
'Statistics_DFA_states'=> { 'stat' => 'num_dfa_states'},
'Statistics_DFA_size'=> { 'stat' => 'dfa_size'},
'Statistics_search_cputime' => { 'stat' => 'search_cputime'},
'Statistics_total_cputime' => { 'stat' => 'total_cputime'},
'Statistics_search_actualtime' => { 'stat' => 'search_actualtime'},
'Statistics_total_actualtime' => { 'stat' => 'total_actualtime'},
'Statistics_noprocessors' => { 'stat' => 'no_of_processors'},
'Statistics_neighbortime' => { 'stat' => 'neighborhood_generate_time'},
'Statistics_starttime' => { 'stat' => 'start_time'},
'Statistics_endtime' => { 'stat' => 'end_time'},
); } |
sub next_result
{ my ($self) = @_;
my $data = '';
my $seentop = 0;
my $reporttype;
$self->start_document();
my @hit_signifs;
while( defined ($_ = $self->_readline )) {
next if( /^\s+$/); next if( /CPU time:/ || /^>\s*$/);
if( /^([T]?BLAST[NPX])\s*(.+)$/i ) {
if( $seentop ) {
$self->_pushback($_);
$self->end_element({ 'Name' => 'BlastOutput'});
return $self->end_document();
}
$self->start_element({ 'Name' => 'BlastOutput' } );
$seentop = 1;
$reporttype = $1;
$self->element({ 'Name' => 'BlastOutput_program',
'Data' => $reporttype});
$self->element({ 'Name' => 'BlastOutput_version',
'Data' => $2});
} elsif ( /^Query=\s*(.+)$/ ) {
my $q = $1;
my $size = 0;
$_ = $self->_readline;
while( defined ($_) && $_ !~ /^\s+$/ ) {
chomp;
if( /\(([\d,]+)\s+letters\)/ ) {
$size = $1;
$size =~ s/,//g;
last;
} else {
$q .= " $_";
$q =~ s/ +/ /g;
$q =~ s/^ | $//g;
}
$_ = $self->_readline;
}
chomp($q);
my ($nm,$desc) = split(/\s+/,$q,2);
$self->element({ 'Name' => 'BlastOutput_query-def',
'Data' => $nm});
$self->element({ 'Name' => 'BlastOutput_query-len',
'Data' => $size});
defined $desc && $desc =~ s/\s+$//;
$self->element({ 'Name' => 'BlastOutput_querydesc',
'Data' => $desc});
if( my @pieces = split(/\|/,$nm) ) {
my $acc = pop @pieces;
$acc = pop @pieces if( ! defined $acc || $acc =~ /^\s+$/);
$self->element({ 'Name' => 'BlastOutput_query-acc',
'Data' => $acc});
}
} elsif( /Sequences producing significant alignments:/ ) {
$_ = $self->_readline();
while( defined ($_ = $self->_readline() ) &&
! /^\s+$/ ) {
my @line = split;
push @hit_signifs, [ pop @line, pop @line];
}
} elsif( /Sequences producing High-scoring Segment Pairs:/ ) {
$_ = $self->_readline();
while( defined ($_ = $self->_readline() ) &&
! /^\s+$/ ) {
my @line = split;
pop @line; push @hit_signifs, [ pop @line, pop @line];
}
} elsif ( /^Database:\s*(.+)$/ ) {
my $db = $1;
while( defined($_ = $self->_readline) ) {
if( /^\s+([\d\,]+)\s+sequences\;\s+([\d,]+)\s+total\s+letters/){
my ($s,$l) = ($1,$2);
$s =~ s/,//g;
$l =~ s/,//g;
$self->element({'Name' => 'BlastOutput_db-len',
'Data' => $s});
$self->element({'Name' => 'BlastOutput_db-let',
'Data' => $l});
last;
} else {
chomp;
$db .= $_;
}
}
$self->element({'Name' => 'BlastOutput_db',
'Data' => $db});
} elsif( /^>(\S+)\s*(.*)?/ ) {
chomp;
$self->in_element('hsp') && $self->end_element({ 'Name' => 'Hsp'});
$self->in_element('hit') && $self->end_element({ 'Name' => 'Hit'});
$self->start_element({ 'Name' => 'Hit'});
my $id = $1;
my $restofline = $2;
$self->element({ 'Name' => 'Hit_id',
'Data' => $id});
my @pieces = split(/\|/,$id);
my $acc = pop @pieces;
$self->element({ 'Name' => 'Hit_accession',
'Data' => $acc});
my $v = shift @hit_signifs;
if( defined $v ) {
$self->element({'Name' => 'Hit_signif',
'Data' => $v->[0]});
$self->element({'Name' => 'Hit_score',
'Data' => $v->[1]});
}
while(defined($_ = $self->_readline()) ) {
next if( /^\s+$/ );
chomp;
if( /Length\s*=\s*([\d,]+)/ ) {
my $l = $1;
$l =~ s/\,//g;
$self->element({ 'Name' => 'Hit_len',
'Data' => $l });
last;
} elsif ( /Score/ ) {
$self->_pushback($_);
last;
} else {
$restofline .= $_;
}
}
$restofline =~ s/\s+/ /g;
$self->element({ 'Name' => 'Hit_def',
'Data' => $restofline});
} elsif( /\s+(Plus|Minus) Strand HSPs:/i ) {
next;
} elsif( ($self->in_element('hit') ||
$self->in_element('hsp')) && /Score\s*=\s*(\S+)\s*\(([\d\.]+)\s*bits\),\s*Expect\s*=\s*([^,\s]+),\s*(Sum)?\s*P(\(\d+\))?\s*=\s*([^,\s]+)/
) {
$self->in_element('hsp') && $self->end_element({'Name' => 'Hsp'});
$self->start_element({'Name' => 'Hsp'});
$self->element( { 'Name' => 'Hsp_score',
'Data' => $1});
$self->element( { 'Name' => 'Hsp_bit-score',
'Data' => $2});
$self->element( { 'Name' => 'Hsp_evalue',
'Data' => $3});
$self->element( {'Name' => 'Hsp_pvalue',
'Data' =>$6});
} elsif( ($self->in_element('hit') || $self->in_element('hsp')) && /Score\s*=\s*(\S+)\s*bits\s*\((\d+)\),\s*Expect(\(\d+\))?\s*=\s*(\S+)/) {
$self->in_element('hsp') && $self->end_element({ 'Name' => 'Hsp'});
$self->start_element({'Name' => 'Hsp'});
$self->element( { 'Name' => 'Hsp_score',
'Data' => $2});
$self->element( { 'Name' => 'Hsp_bit-score',
'Data' => $1});
$self->element( { 'Name' => 'Hsp_evalue',
'Data' => $4});
} elsif( $self->in_element('hsp') &&
/Identities\s*=\s*(\d+)\s*\/\s*(\d+)\s*[\d\%\(\)]+\s*(,\s*Positives\s*=\s*(\d+)\/(\d+)\s*[\d\%\(\)]+\s*)?(\,\s*Gaps\s*=\s*(\d+)\/(\d+))?/i ) {
$self->element( { 'Name' => 'Hsp_identity',
'Data' => $1});
$self->element( {'Name' => 'Hsp_align-len',
'Data' => $2});
if( defined $3 ) {
$self->element( { 'Name' => 'Hsp_positive',
'Data' => $4});
}
if( defined $6 ) {
$self->element( { 'Name' => 'Hsp_gaps',
'Data' => $7});
}
$self->{'_Query'} = { 'begin' => 0, 'end' => 0};
$self->{'_Sbjct'} = { 'begin' => 0, 'end' => 0};
if( /(Frame\s*=\s*.+)$/ ) {
$self->_pushback($1);
}
} elsif( $self->in_element('hsp') &&
/Strand\s*=\s*(Plus|Minus)\s*\/\s*(Plus|Minus)/i ) {
next;
} elsif( $self->in_element('hsp') &&
/Frame\s*=\s*([\+\-][1-3])\s*(\/\s*([\+\-][1-3]))?/ ){
my ($queryframe,$hitframe);
if( $reporttype eq 'TBLASTX' ) {
($queryframe,$hitframe) = ($1,$2);
$hitframe =~ s/\/\s*//g;
} elsif( $reporttype eq 'TBLASTN' ) {
($hitframe,$queryframe) = ($1,0);
} elsif( $reporttype eq 'BLASTX' ) {
($queryframe,$hitframe) = ($1,0);
}
$self->element({'Name' => 'Hsp_query-frame',
'Data' => $queryframe});
$self->element({'Name' => 'Hsp_hit-frame',
'Data' => $hitframe});
} elsif( /^Parameters:/ || /^\s+Database:\s+?/ ||
( $self->in_element('hsp') && (/WARNING/ || /NOTE/)) ) {
$self->in_element('hsp') && $self->end_element({'Name' => 'Hsp'});
$self->in_element('hit') && $self->end_element({'Name' => 'Hit'});
my $blast = ( /Parameters\:/ ) ? 'wublast' : 'ncbi';
my $last = '';
$self->element({'Name' => 'Parameters_allowgaps',
'Data' => 'yes'});
while( defined ($_ = $self->_readline ) ) {
if( /^([T]?BLAST[NPX])\s*([\d\.]+)/i ) {
$self->_pushback($_);
last;
}
if( /Number of Sequences:\s+([\d\,]+)/i ||
/of sequences in database:\s+([\d,]+)/i) {
my $c = $1;
$c =~ s/\,//g;
$self->element({'Name' => 'Statistics_db-len',
'Data' => $c});
} elsif ( /letters in database:\s+([\d,]+)/i) {
my $s = $1;
$s =~ s/,//g;
$self->element({'Name' => 'Statistics_db-let',
'Data' => $s});
} elsif( $blast eq 'wublast' ) {
if( /E=(\S+)/ ) {
$self->element({'Name' => 'Parameters_expect',
'Data' => $1});
} elsif( /nogaps/ ) {
$self->element({'Name' => 'Parameters_allowgaps',
'Data' => 'no'});
} elsif( $last =~ /(Frame|Strand)\s+MatID\s+Matrix name/i ) {
s/^\s+//;
my @vals = split;
splice(@vals, 0,2);
my ($matrix,$lambda,$kappa,$entropy) = @vals;
$self->element({'Name' => 'Parameters_matrix',
'Data' => $matrix});
$self->element({'Name' => 'Statistics_lambda',
'Data' => $lambda});
$self->element({'Name' => 'Statistics_kappa',
'Data' => $kappa});
$self->element({'Name' => 'Statistics_entropy',
'Data' => $entropy});
} elsif( /(\S+\s+\S+)\s+DFA:\s+(\S+)\s+\((.+)\)/ ) {
if( $1 eq 'states in') {
$self->element({'Name' => 'Statistics_DFA_states',
'Data' => "$2 $3"});
} elsif( $1 eq 'size of') {
$self->element({'Name' => 'Statistics_DFA_size',
'Data' => "$2 $3"});
}
} elsif( /^\s+Time to generate neighborhood:\s+(\S+\s+\S+\s+\S+)/ ) {
$self->element({'Name' => 'Statistics_neighbortime',
'Data' => $1});
} elsif( /processors\s+used:\s+(\d+)/ ) {
$self->element({'Name' => 'Statistics_noprocessors',
'Data' => $1});
} elsif( /^\s+(\S+)\s+cpu\s+time:\s+(\S+\s+\S+\s+\S+)\s+Elapsed:\s+(\S+)/ ) {
my $cputype = lc($1);
$self->element({'Name' => "Statistics_$cputype\_cputime",
'Data' => $2});
$self->element({'Name' => "Statistics_$cputype\_actualtime",
'Data' => $3});
} elsif( /^\s+Start:/ ) {
my ($junk,$start,$stime,$end,$etime) = split(/\s+(Start|End)\:\s+/,$_);
chomp($stime);
$self->element({'Name' => 'Statistics_starttime',
'Data' => $stime});
chomp($etime);
$self->element({'Name' => 'Statistics_endtime',
'Data' => $etime});
}
} elsif ( $blast eq 'ncbi' ) {
if( /^Matrix:\s+(\S+)/i ) {
$self->element({'Name' => 'Parameters_matrix',
'Data' => $1});
} elsif( /Lambda/ ) {
$_ = $self->_readline;
s/^\s+//;
my ($lambda, $kappa, $entropy) = split;
$self->element({'Name' => 'Statistics_lambda',
'Data' => $lambda});
$self->element({'Name' => 'Statistics_kappa',
'Data' => $kappa});
$self->element({'Name' => 'Statistics_entropy',
'Data' => $entropy});
} elsif( /effective\s+search\s+space\s+used:\s+(\d+)/ ) {
$self->element({'Name' => 'Statistics_eff-spaceused',
'Data' => $1});
} elsif( /effective\s+search\s+space:\s+(\d+)/ ) {
$self->element({'Name' => 'Statistics_eff-space',
'Data' => $1});
} elsif( /Gap\s+Penalties:\s+Existence:\s+(\d+)\,\s+Extension:\s+(\d+)/) {
$self->element({'Name' => 'Parameters_gap-open',
'Data' => $1});
$self->element({'Name' => 'Parameters_gap-extend',
'Data' => $2});
} elsif( /effective\s+HSP\s+length:\s+(\d+)/ ) {
$self->element({'Name' => 'Statistics_hsp-len',
'Data' => $1});
} elsif( /effective\s+length\s+of\s+query:\s+([\d\,]+)/ ) {
my $c = $1;
$c =~ s/\,//g;
$self->element({'Name' => 'Statistics_query-len',
'Data' => $c});
} elsif( /effective\s+length\s+of\s+database:\s+([\d\,]+)/){
my $c = $1;
$c =~ s/\,//g;
$self->element({'Name' => 'Statistics_eff-dblen',
'Data' => $c});
} elsif( /^(T|A|X1|X2|S1|S2):\s+(\d+)/ ) {
$self->element({'Name' => "Statistics_$1",
'Data' => $2})
} elsif( /frameshift\s+window\,\s+decay\s+const:\s+(\d+)\,\s+([\.\d]+)/ ) {
$self->element({'Name'=> 'Statistics_framewindow',
'Data' => $1});
$self->element({'Name'=> 'Statistics_decay',
'Data' => $2});
}
}
$last = $_;
}
} elsif( $self->in_element('hsp') ) {
my %data = ( 'Query' => '',
'Mid' => '',
'Hit' => '' );
my $len;
for( my $i = 0;
defined($_) && $i < 3;
$i++ ){
chomp;
if( /^((Query|Sbjct):\s+(\d+)\s*)(\S+)\s+(\d+)/ ) {
$data{$2} = $4;
$len = length($1);
$self->{"\_$2"}->{'begin'} = $3 unless $self->{"_$2"}->{'begin'};
$self->{"\_$2"}->{'end'} = $5;
} else {
$self->throw("no data for midline $_")
unless (defined $_ && defined $len);
$data{'Mid'} = substr($_,$len);
}
$_ = $self->_readline();
}
$self->characters({'Name' => 'Hsp_qseq',
'Data' => $data{'Query'} });
$self->characters({'Name' => 'Hsp_hseq',
'Data' => $data{'Sbjct'}});
$self->characters({'Name' => 'Hsp_midline',
'Data' => $data{'Mid'} });
} else {
$self->debug( "unrecognized line $_");
}
}
$self->end_element({'Name' => 'BlastOutput'}) unless ! $seentop;
return $self->end_document();} |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _