BEGIN {
%MODEMAP = (
'BlastOutput' => 'result',
'Iteration' => 'iteration',
'Hit' => 'hit',
'Hsp' => 'hsp'
);
%MAPPING = (
'Hsp_bit-score' => 'HSP-bits',
'Hsp_score' => 'HSP-score',
'Hsp_evalue' => 'HSP-evalue',
'Hsp_pvalue' => 'HSP-pvalue',
'Hsp_query-from' => 'HSP-query_start',
'Hsp_query-to' => 'HSP-query_end',
'Hsp_hit-from' => 'HSP-hit_start',
'Hsp_hit-to' => 'HSP-hit_end',
'Hsp_positive' => 'HSP-conserved',
'Hsp_identity' => 'HSP-identical',
'Hsp_gaps' => 'HSP-hsp_gaps',
'Hsp_hitgaps' => 'HSP-hit_gaps',
'Hsp_querygaps' => 'HSP-query_gaps',
'Hsp_qseq' => 'HSP-query_seq',
'Hsp_hseq' => 'HSP-hit_seq',
'Hsp_midline' => 'HSP-homology_seq',
'Hsp_align-len' => 'HSP-hsp_length',
'Hsp_query-frame' => 'HSP-query_frame',
'Hsp_hit-frame' => 'HSP-hit_frame',
'Hsp_links' => 'HSP-links',
'Hsp_group' => 'HSP-hsp_group',
'Hit_id' => 'HIT-name',
'Hit_len' => 'HIT-length',
'Hit_accession' => 'HIT-accession',
'Hit_def' => 'HIT-description',
'Hit_signif' => 'HIT-significance',
'Hit_score' => 'HIT-score',
'Hit_bits' => 'HIT-bits',
'Iteration_iter-num' => 'ITERATION-number',
'Iteration_converged' => 'ITERATION-converged',
'BlastOutput_program' => 'RESULT-algorithm_name',
'BlastOutput_version' => 'RESULT-algorithm_version',
'BlastOutput_query-def' => 'RESULT-query_name',
'BlastOutput_query-len' => 'RESULT-query_length',
'BlastOutput_query-acc' => 'RESULT-query_accession',
'BlastOutput_querydesc' => 'RESULT-query_description',
'BlastOutput_db' => 'RESULT-database_name',
'BlastOutput_db-len' => 'RESULT-database_entries',
'BlastOutput_db-let' => 'RESULT-database_letters',
'BlastOutput_inclusion-threshold' => 'RESULT-inclusion_threshold',
'Parameters_matrix' => { 'RESULT-parameters' => 'matrix' },
'Parameters_expect' => { 'RESULT-parameters' => 'expect' },
'Parameters_include' => { 'RESULT-parameters' => 'include' },
'Parameters_sc-match' => { 'RESULT-parameters' => 'match' },
'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch' },
'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen' },
'Parameters_gap-extend' => { 'RESULT-parameters' => 'gapext' },
'Parameters_filter' => { 'RESULT-parameters' => 'filter' },
'Parameters_allowgaps' => { 'RESULT-parameters' => 'allowgaps' },
'Parameters_full_dbpath' => { 'RESULT-parameters' => 'full_dbpath' },
'Statistics_db-len' => { 'RESULT-statistics' => 'dbentries' },
'Statistics_db-let' => { 'RESULT-statistics' => 'dbletters' },
'Statistics_hsp-len' =>
{ 'RESULT-statistics' => 'effective_hsplength' },
'Statistics_query-len' => { 'RESULT-statistics' => 'querylength' },
'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace' },
'Statistics_eff-spaceused' =>
{ 'RESULT-statistics' => 'effectivespaceused' },
'Statistics_eff-dblen' =>
{ 'RESULT-statistics' => 'effectivedblength' },
'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' },
'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' },
'Statistics_entropy' => { 'RESULT-statistics' => 'entropy' },
'Statistics_gapped_kappa' => { 'RESULT-statistics' => 'kappa_gapped' },
'Statistics_gapped_lambda' =>
{ 'RESULT-statistics' => 'lambda_gapped' },
'Statistics_gapped_entropy' =>
{ 'RESULT-statistics' => 'entropy_gapped' },
'Statistics_framewindow' =>
{ 'RESULT-statistics' => 'frameshiftwindow' },
'Statistics_decay' => { 'RESULT-statistics' => 'decayconst' },
'Statistics_hit_to_db' => { 'RESULT-statistics' => 'Hits_to_DB' },
'Statistics_num_suc_extensions' =>
{ 'RESULT-statistics' => 'num_successful_extensions' },
'Statistics_DFA_states' => { 'RESULT-statistics' => 'num_dfa_states' },
'Statistics_DFA_size' => { 'RESULT-statistics' => 'dfa_size' },
'Statistics_noprocessors' =>
{ 'RESULT-statistics' => 'no_of_processors' },
'Statistics_neighbortime' =>
{ 'RESULT-statistics' => 'neighborhood_generate_time' },
'Statistics_starttime' => { 'RESULT-statistics' => 'start_time' },
'Statistics_endtime' => { 'RESULT-statistics' => 'end_time' },
);
for my $frame ( 0 .. 3 ) {
for my $strand ( '+', '-' ) {
for my $ind (
qw(length efflength E S W T X X_gapped E2
E2_gapped S2)
)
{
$MAPPING{"Statistics_frame$strand$frame\_$ind"} =
{ 'RESULT-statistics' => "Frame$strand$frame\_$ind" };
}
for my $val (qw(lambda kappa entropy )) {
for my $type (qw(used computed gapped)) {
my $key = "Statistics_frame$strand$frame\_$val\_$type";
my $val =
{ 'RESULT-statistics' =>
"Frame$strand$frame\_$val\_$type" };
$MAPPING{$key} = $val;
}
}
}
}
for my $stats (
qw(T A X1 X2 X3 S1 S2 X1_bits X2_bits X3_bits
S1_bits S2_bits num_extensions
num_successful_extensions
seqs_better_than_cutoff
posted_date
search_cputime total_cputime
search_actualtime total_actualtime
no_of_processors ctxfactor)
)
{
my $key = "Statistics_$stats";
my $val = { 'RESULT-statistics' => $stats };
$MAPPING{$key} = $val;
}
for my $param (
qw(span span1 span2 links warnings notes hspsepsmax
hspsepqmax topcomboN topcomboE postsw cpus wordmask
filter sort_by_pvalue sort_by_count sort_by_highscore
sort_by_totalscore sort_by_subjectlength noseqs gi qtype
qres V B Z Y M N)
)
{
my $key = "Parameters_$param";
my $val = { 'RESULT-parameters' => $param };
$MAPPING{$key} = $val;
}
$DEFAULT_BLAST_WRITER_CLASS = 'Bio::Search::Writer::HitTableWriter';
$MAX_HSP_OVERLAP = 2; $DEFAULTREPORTTYPE = 'BLASTP';
} |
sub next_result
{ my ($self) = @_;
my $v = $self->verbose;
my $data = '';
my $flavor = '';
$self->{'_seentop'} = 0; my ( $reporttype, $seenquery, $reportline );
my ( $seeniteration, $found_again );
my $incl_threshold = $self->inclusion_threshold;
my $bl2seq_fix;
$self->start_document(); my (@hit_signifs);
my $gapped_stats = 0; local $_ = "\n"; while ( defined( $_ = $self->_readline ) ) {
next if (/^\s+$/); next if (/CPU time:/);
next if (/^>\s*$/);
if (
/^([T]?BLAST[NPX])\s*(.+)$/i || /^(PSITBLASTN)\s+(.+)$/i || /^(RPS-BLAST)\s*(.+)$/i || /^(MEGABLAST)\s*(.+)$/i || /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i )
{
$self->debug("blast.pm: Start of new report: $1 $2\n");
if ( $self->{'_seentop'} ) {
$self->_pushback($_);
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
$self->end_element( { 'Name' => 'BlastOutput' } );
return $self->end_document();
}
$self->_start_blastoutput;
$reporttype = $1;
if ($reporttype =~ /RPS-BLAST/) {
$reporttype .= '(BLASTP)'; }
$reportline = $_; $self->element(
{
'Name' => 'BlastOutput_program',
'Data' => $reporttype
}
);
$self->element(
{
'Name' => 'BlastOutput_version',
'Data' => $2
}
);
$self->element(
{
'Name' => 'BlastOutput_inclusion-threshold',
'Data' => $incl_threshold
}
);
}
elsif (/^(Searching|Results from round)/) {
next unless $1 =~ /Results from round/;
$self->debug("blast.pm: Possible psi blast iterations found...\n");
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
if ( defined $seeniteration ) {
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
$self->_start_iteration;
}
else {
$self->_start_iteration;
}
$seeniteration = 1;
}
elsif (/^Query=\s*(.*)$/) {
$self->debug("blast.pm: Query= found...$_\n");
my $q = $1;
my $size = 0;
if ( defined $seenquery ) {
$self->_pushback($reportline) if $reportline;
$self->_pushback($_);
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
if ($bl2seq_fix) {
$self->element(
{
'Name' => 'BlastOutput_program',
'Data' => $reporttype
}
);
}
$self->end_element( { 'Name' => 'BlastOutput' } );
return $self->end_document();
}
else {
if ( !defined $reporttype ) {
$self->_start_blastoutput;
if ( defined $seeniteration ) {
$self->in_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
$self->_start_iteration;
}
else {
$self->_start_iteration;
}
$seeniteration = 1;
}
}
$seenquery = $q;
$_ = $self->_readline;
while ( defined($_) ) {
if (/^Database:/) {
$self->_pushback($_);
last;
}
chomp;
if ( /\((\-?[\d,]+)\s+letters.*\)/ || /^Length=(\-?[\d,]+)/ ) {
$size = $1;
$size =~ s/,//g;
last;
}
else {
$q .= " $_";
$q =~ s/ +/ /g;
$q =~ s/^ | $//g;
}
$_ = $self->_readline;
}
chomp($q);
my ( $nm, $desc ) = split( /\s+/, $q, 2 );
$self->element(
{
'Name' => 'BlastOutput_query-def',
'Data' => $nm
}
);
$self->element(
{
'Name' => 'BlastOutput_query-len',
'Data' => $size
}
);
defined $desc && $desc =~ s/\s+$//;
$self->element(
{
'Name' => 'BlastOutput_querydesc',
'Data' => $desc
}
);
my ( $acc, $version ) = &_get_accession_version($nm);
$version = defined($version) && length($version) ? ".$version" : "";
$acc = '' unless defined($acc);
$self->element(
{
'Name' => 'BlastOutput_query-acc',
'Data' => "$acc$version"
}
);
}
elsif (/Sequences producing significant alignments:/) {
$self->debug("blast.pm: Processing NCBI-BLAST descriptions\n");
$flavor = 'ncbi';
if ( !$self->in_element('iteration') ) {
$self->_start_iteration;
}
descline:
while ( defined( $_ = $self->_readline() ) ) {
if (/^>/
|| /^\s+Database:\s+?/
|| /^Parameters:/
|| /^\s+Subset/
|| /^\s*Lambda/
|| /^\s*Histogram/
) {
$self->_pushback($_); last descline;
}
elsif (/([\d\.\+\-eE]+)\s+([\d\.\+\-eE]+)(\s+\d+)?\s*$/) {
my ( $score, $evalue ) = ( $1, $2 );
$evalue =~ s/^e/1e/i;
my @line = split;
pop @line, pop @line;
if ($3) { pop @line }
push @hit_signifs,
[ $evalue, $score, shift @line, join( ' ', @line ) ];
}
elsif (/^CONVERGED/i) {
$self->element(
{
'Name' => 'Iteration_converged',
'Data' => 1
}
);
}
@hit_signifs = sort {$a->[0] <=> $b->[0]} @hit_signifs;
}
}
elsif (/Sequences producing High-scoring Segment Pairs:/) {
$self->debug("blast.pm: Processing WU-BLAST descriptions\n");
$_ = $self->_readline();
$flavor = 'wu';
if ( !$self->in_element('iteration') ) {
$self->_start_iteration;
}
while ( defined( $_ = $self->_readline() )
&& !/^\s+$/ )
{
my @line = split;
pop @line;
push @hit_signifs,
[ pop @line, pop @line, shift @line, join( ' ', @line ) ];
}
}
elsif (/^Database:\s*(.+)$/) {
$self->debug("blast.pm: Database: $1\n");
my $db = $1;
while ( defined( $_ = $self->_readline ) ) {
if (
/^\s+(\-?[\d\,]+|\S+)\s+sequences\; \s +(\-?[\d,]+|\S+)\s+ total\s+letters/ox ) { my ( $s, $l ) = ( $1, $2 ); $s =~ s/,//g;
$l =~ s/,//g;
$self->element(
{
'Name' => 'BlastOutput_db-len',
'Data' => $s
}
);
$self->element(
{
'Name' => 'BlastOutput_db-let',
'Data' => $l
}
);
last;
}
else {
chomp;
$db .= $_;
}
}
$self->element(
{
'Name' => 'BlastOutput_db',
'Data' => $db
}
);
}
elsif (/^\sFeatures\s\w+\sthis\spart\sof\ssubject\ssequence:/) {
while($_ !~ /^\sScore\s=/) {
$self->debug("Bypassing features line: $_");
$_ = $self->_readline;
}
$self->_pushback($_);
}
elsif (/^>\s*(\S+)\s*(.*)?/) {
chomp;
$self->debug("blast.pm: Hit: $1\n");
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
if ( !$self->within_element('result') ) {
$self->_start_blastoutput;
$self->_start_iteration;
}
elsif ( !$self->within_element('iteration') ) {
$self->_start_iteration;
}
$self->start_element( { 'Name' => 'Hit' } );
my $id = $1;
my $restofline = $2;
$self->debug("Starting a hit: $1 $2\n");
$self->element(
{
'Name' => 'Hit_id',
'Data' => $id
}
);
my ( $acc, $version ) = &_get_accession_version($id);
$self->element(
{
'Name' => 'Hit_accession',
'Data' => $acc
}
);
my $v = shift @hit_signifs;
if ( defined $v ) {
$self->element(
{
'Name' => 'Hit_signif',
'Data' => $v->[0]
}
);
$self->element(
{
'Name' => 'Hit_score',
'Data' => $v->[1]
}
);
}
while ( defined( $_ = $self->_readline() ) ) {
next if (/^\s+$/);
chomp;
if (/Length\s*=\s*([\d,]+)/) {
my $l = $1;
$l =~ s/\,//g;
$self->element(
{
'Name' => 'Hit_len',
'Data' => $l
}
);
last;
}
else {
$restofline .= $_;
}
}
$restofline =~ s/\s+/ /g;
$self->element(
{
'Name' => 'Hit_def',
'Data' => $restofline
}
);
}
elsif (/\s+(Plus|Minus) Strand HSPs:/i) {
next;
}
elsif (
( $self->in_element('hit') || $self->in_element('hsp') )
&& m/Score\s*=\s*(\S+)\s*bits\s* # Bit score (?:\((\d+)\))?, # Raw score \s+Log\-Length\sScore\s*=\s*(\d+) # Log-Length score /ox
)
{
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->start_element( { 'Name' => 'Hsp' } );
$self->debug( "Got paracel genewise HSP score=$1\n");
my ( $bits, $score, $evalue ) = ( $1, $2, $3 );
$evalue =~ s/^e/1e/i;
$self->element(
{
'Name' => 'Hsp_score',
'Data' => $score
}
);
$self->element(
{
'Name' => 'Hsp_bit-score',
'Data' => $bits
}
);
$self->element(
{
'Name' => 'Hsp_evalue',
'Data' => $evalue
}
);
}
elsif (
( $self->in_element('hit') || $self->in_element('hsp') )
&& m/Score\s*=\s*([^,\s]+), # Raw score \s*Expect\s*=\s*([^,\s]+), # E-value \s*P(?:\(\S+\))?\s*=\s*([^,\s]+) # P-value /ox
)
{
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->start_element( { 'Name' => 'Hsp' } );
$self->debug( "Got paracel hframe HSP score=$1\n");
my ( $score, $evalue, $pvalue ) = ( $1, $2, $3 );
$evalue = "1$evalue" if $evalue =~ /^e/;
$pvalue = "1$pvalue" if $pvalue =~ /^e/;
$self->element(
{
'Name' => 'Hsp_score',
'Data' => $score
}
);
$self->element(
{
'Name' => 'Hsp_evalue',
'Data' => $evalue
}
);
$self->element(
{
'Name' => 'Hsp_pvalue',
'Data' => $pvalue
}
);
}
elsif (
( $self->in_element('hit') || $self->in_element('hsp') )
&& m/Score\s*=\s*(\S+)\s* # Bit score \(([\d\.]+)\s*bits\), # Raw score \s*Expect\s*=\s*([^,\s]+), # E-value \s*(?:Sum)?\s* # SUM P(?:\(\d+\))?\s*=\s*([^,\s]+) # P-value (?:\s*,\s+Group\s*\=\s*(\d+))? # HSP Group /ox
)
{ $self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->start_element( { 'Name' => 'Hsp' } );
my ( $score, $bits, $evalue, $pvalue, $group ) =
( $1, $2, $3, $4, $5 );
$evalue =~ s/^e/1e/i;
$pvalue =~ s/^e/1e/i;
$self->element(
{
'Name' => 'Hsp_score',
'Data' => $score
}
);
$self->element(
{
'Name' => 'Hsp_bit-score',
'Data' => $bits
}
);
$self->element(
{
'Name' => 'Hsp_evalue',
'Data' => $evalue
}
);
$self->element(
{
'Name' => 'Hsp_pvalue',
'Data' => $pvalue
}
);
if ( defined $group ) {
$self->element(
{
'Name' => 'Hsp_group',
'Data' => $group
}
);
}
}
elsif (
( $self->in_element('hit') || $self->in_element('hsp') )
&& m/Score\s*=\s*(\S+)\s*bits\s* # Bit score (?:\((\d+)\))?, # Missing for BLAT pseudo-BLAST fmt \s*Expect(?:\(\d+\+?\))?\s*=\s*(\S+) # E-value /ox
)
{ $self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
my ( $bits, $score, $evalue ) = ( $1, $2, $3 );
$evalue =~ s/^e/1e/i;
$self->start_element( { 'Name' => 'Hsp' } );
$self->element(
{
'Name' => 'Hsp_score',
'Data' => $score
}
);
$self->element(
{
'Name' => 'Hsp_bit-score',
'Data' => $bits
}
);
$self->element(
{
'Name' => 'Hsp_evalue',
'Data' => $evalue
}
);
$score = '' unless defined $score; $self->debug("Got NCBI HSP score=$score, evalue $evalue\n");
}
elsif (
$self->in_element('hsp')
&& m/Identities\s*=\s*(\d+)\s*\/\s*(\d+)\s*[\d\%\(\)]+\s* (?:,\s*Positives\s*=\s*(\d+)\/(\d+)\s*[\d\%\(\)]+\s*)? # pos only valid for Protein alignments (?:\,\s*Gaps\s*=\s*(\d+)\/(\d+))? # Gaps /oxi
)
{
$self->element(
{
'Name' => 'Hsp_identity',
'Data' => $1
}
);
$self->element(
{
'Name' => 'Hsp_align-len',
'Data' => $2
}
);
if ( defined $3 ) {
$self->element(
{
'Name' => 'Hsp_positive',
'Data' => $3
}
);
}
else {
$self->element(
{
'Name' => 'Hsp_positive',
'Data' => $1
}
);
}
if ( defined $6 ) {
$self->element(
{
'Name' => 'Hsp_gaps',
'Data' => $5
}
);
}
$self->{'_Query'} = { 'begin' => 0, 'end' => 0 };
$self->{'_Sbjct'} = { 'begin' => 0, 'end' => 0 };
if (/(Frame\s*=\s*.+)$/) {
$self->_pushback($1);
}
}
elsif ( $self->in_element('hsp')
&& /Strand\s*=\s*(Plus|Minus)\s*\/\s*(Plus|Minus)/i )
{
unless ($reporttype) {
$self->{'_reporttype'} = $reporttype = 'BLASTN';
$bl2seq_fix = 1; }
next;
}
elsif ( $self->in_element('hsp')
&& /Links\s*=\s*(\S+)/ox )
{
$self->element(
{
'Name' => 'Hsp_links',
'Data' => $1
}
);
}
elsif ( $self->in_element('hsp')
&& /Frame\s*=\s*([\+\-][1-3])\s*(\/\s*([\+\-][1-3]))?/ )
{
unless ( defined $reporttype ) {
$bl2seq_fix = 1;
if ( $1 && $2 ) { $reporttype = 'TBLASTX' }
else {
$reporttype = 'BLASTX';
}
$self->{'_reporttype'} = $reporttype;
}
my ( $queryframe, $hitframe );
if ( $reporttype eq 'TBLASTX' ) {
( $queryframe, $hitframe ) = ( $1, $2 );
$hitframe =~ s/\/\s*//g;
}
elsif ( $reporttype eq 'TBLASTN' || $reporttype eq 'PSITBLASTN') {
( $hitframe, $queryframe ) = ( $1, 0 );
}
elsif ( $reporttype eq 'BLASTX' || $reporttype eq 'RPS-BLAST(BLASTP)') {
( $queryframe, $hitframe ) = ( $1, 0 );
if ($reporttype eq 'RPS-BLAST(BLASTP)') {
$self->element(
{
'Name' => 'BlastOutput_program',
'Data' => 'RPS-BLAST(BLASTX)'
}
);
}
}
$self->element(
{
'Name' => 'Hsp_query-frame',
'Data' => $queryframe
}
);
$self->element(
{
'Name' => 'Hsp_hit-frame',
'Data' => $hitframe
}
);
}
elsif (/^Parameters:/
|| /^\s+Database:\s+?/
|| /^\s+Subset/
|| /^\s*Lambda/
|| /^\s*Histogram/
|| ( $self->in_element('hsp') && /WARNING|NOTE/ ) )
{
$self->debug("blast.pm: found parameters section\n ");
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
while ( my $v = shift @hit_signifs ) {
next unless defined $v;
$self->start_element( { 'Name' => 'Hit' } );
my $id = $v->[2];
my $desc = $v->[3];
$self->element(
{
'Name' => 'Hit_id',
'Data' => $id
}
);
my ( $acc, $version ) = &_get_accession_version($id);
$self->element(
{
'Name' => 'Hit_accession',
'Data' => $acc
}
);
if ( defined $v ) {
$self->element(
{
'Name' => 'Hit_signif',
'Data' => $v->[0]
}
);
$self->element(
{
'Name' => 'Hit_score',
'Data' => $v->[1]
}
);
}
$self->element(
{
'Name' => 'Hit_def',
'Data' => $desc
}
);
$self->end_element( { 'Name' => 'Hit' } );
}
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
next if /^\s+Subset/;
my $blast = (/^(\s+Database\:)|(\s*Lambda)/) ? 'ncbi' : 'wublast';
if (/^\s*Histogram/) {
$blast = 'btk';
}
my $last = '';
$self->element(
{
'Name' => 'Parameters_allowgaps',
'Data' => 'yes'
}
);
while ( defined( $_ = $self->_readline ) ) {
if (
/^(PSI)?([T]?BLAST[NPX])\s*(.+)/i
|| /^MEGABLAST\s*(.+)/i
|| /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i )
{
$self->_pushback($_);
last;
}
elsif (/^Query=/) {
$self->_pushback($reportline) if $reportline;
$self->_pushback($_);
$self->in_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->in_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
if ($bl2seq_fix) {
$self->element(
{
'Name' => 'BlastOutput_program',
'Data' => $reporttype
}
);
}
$self->end_element( { 'Name' => 'BlastOutput' } );
return $self->end_document();
}
if ( /Number of Sequences:\s+([\d\,]+)/i
|| /of sequences in database:\s+(\-?[\d,]+)/i )
{
my $c = $1;
$c =~ s/\,//g;
$self->element(
{
'Name' => 'Statistics_db-len',
'Data' => $c
}
);
}
elsif (/letters in database:\s+(\-?[\d,]+)/i) {
my $s = $1;
$s =~ s/,//g;
$self->element(
{
'Name' => 'Statistics_db-let',
'Data' => $s
}
);
}
elsif ( $blast eq 'btk' ) {
next;
}
elsif ( $blast eq 'wublast' ) {
if (/E=(\S+)/) {
$self->element(
{
'Name' => 'Parameters_expect',
'Data' => $1
}
);
}
elsif (/nogaps/) {
$self->element(
{
'Name' => 'Parameters_allowgaps',
'Data' => 'no'
}
);
}
elsif (/ctxfactor=(\S+)/) {
$self->element(
{
'Name' => 'Statistics_ctxfactor',
'Data' => $1
}
);
}
elsif (
/(postsw|links|span[12]?|warnings|notes|gi|noseqs|qres|qype)/
)
{
$self->element(
{
'Name' => "Parameters_$1",
'Data' => 'yes'
}
);
}
elsif (/(\S+)=(\S+)/) {
$self->element(
{
'Name' => "Parameters_$1",
'Data' => $2
}
);
}
elsif ( $last =~ /(Frame|Strand)\s+MatID\s+Matrix name/i ) {
my $firstgapinfo = 1;
my $frame = undef;
while ( defined($_) && !/^\s+$/ ) {
s/^\s+//;
s/\s+$//;
if ( $firstgapinfo
&& s/Q=(\d+),R=(\d+)\s+//x )
{
$firstgapinfo = 0;
$self->element(
{
'Name' => 'Parameters_gap-open',
'Data' => $1
}
);
$self->element(
{
'Name' => 'Parameters_gap-extend',
'Data' => $2
}
);
my @fields = split;
for my $type (
qw(lambda_gapped
kappa_gapped
entropy_gapped)
)
{
next if $type eq 'n/a';
if ( !@fields ) {
warn "fields is empty for $type\n";
next;
}
$self->element(
{
'Name' =>
"Statistics_frame$frame\_$type",
'Data' => shift @fields
}
);
}
}
else {
my ( $frameo, $matid, $matrix, @fields ) =
split;
if ( !defined $frame ) {
$self->element(
{
'Name' => 'Parameters_matrix',
'Data' => $matrix
}
);
$self->element(
{
'Name' => 'Statistics_lambda',
'Data' => $fields[0]
}
);
$self->element(
{
'Name' => 'Statistics_kappa',
'Data' => $fields[1]
}
);
$self->element(
{
'Name' => 'Statistics_entropy',
'Data' => $fields[2]
}
);
}
$frame = $frameo;
my $ii = 0;
for my $type (
qw(lambda_used
kappa_used
entropy_used
lambda_computed
kappa_computed
entropy_computed)
)
{
my $f = $fields[$ii];
next unless defined $f; if ( $f eq 'same' ) {
$f = $fields[ $ii - 3 ];
}
$ii++;
$self->element(
{
'Name' =>
"Statistics_frame$frame\_$type",
'Data' => $f
}
);
}
}
$_ = $self->_readline;
}
$last = $_;
}
elsif ( $last =~ /(Frame|Strand)\s+MatID\s+Length/i ) {
my $frame = undef;
while ( defined($_) && !/^\s+/ ) {
s/^\s+//;
s/\s+$//;
my @fields = split;
if ( @fields <= 3 ) {
for my $type (qw(X_gapped E2_gapped S2)) {
last unless @fields;
$self->element(
{
'Name' =>
"Statistics_frame$frame\_$type",
'Data' => shift @fields
}
);
}
}
else {
for my $type (
qw(length
efflength
E S W T X E2 S2)
)
{
$self->element(
{
'Name' =>
"Statistics_frame$frame\_$type",
'Data' => shift @fields
}
);
}
}
$_ = $self->_readline;
}
$last = $_;
}
elsif (/(\S+\s+\S+)\s+DFA:\s+(\S+)\s+\((.+)\)/) {
if ( $1 eq 'states in' ) {
$self->element(
{
'Name' => 'Statistics_DFA_states',
'Data' => "$2 $3"
}
);
}
elsif ( $1 eq 'size of' ) {
$self->element(
{
'Name' => 'Statistics_DFA_size',
'Data' => "$2 $3"
}
);
}
}
elsif (
m/^\s+Time to generate neighborhood:\s+ (\S+\s+\S+\s+\S+)/x
)
{
$self->element(
{
'Name' => 'Statistics_neighbortime',
'Data' => $1
}
);
}
elsif (/processors\s+used:\s+(\d+)/) {
$self->element(
{
'Name' => 'Statistics_noprocessors',
'Data' => $1
}
);
}
elsif (
m/^\s+(\S+)\s+cpu\s+time:\s+# cputype (\S+\s+\S+\s+\S+) # cputime \s+Elapsed:\s+(\S+)/x
)
{
my $cputype = lc($1);
$self->element(
{
'Name' => "Statistics_$cputype\_cputime",
'Data' => $2
}
);
$self->element(
{
'Name' => "Statistics_$cputype\_actualtime",
'Data' => $3
}
);
}
elsif (/^\s+Start:/) {
my ( $junk, $start, $stime, $end, $etime ) =
split( /\s+(Start|End)\:\s+/, $_ );
chomp($stime);
$self->element(
{
'Name' => 'Statistics_starttime',
'Data' => $stime
}
);
chomp($etime);
$self->element(
{
'Name' => 'Statistics_endtime',
'Data' => $etime
}
);
}
elsif (/^\s+Database:\s+(.+)$/) {
$self->element(
{
'Name' => 'Parameters_full_dbpath',
'Data' => $1
}
);
}
elsif (/^\s+Posted:\s+(.+)/) {
my $d = $1;
chomp($d);
$self->element(
{
'Name' => 'Statistics_posted_date',
'Data' => $d
}
);
}
}
elsif ( $blast eq 'ncbi' ) {
if (m/^Matrix:\s+(.+)\s*$/oxi) {
$self->element(
{
'Name' => 'Parameters_matrix',
'Data' => $1
}
);
}
elsif (/^Gapped/) {
$gapped_stats = 1;
}
elsif (/^Lambda/) {
$_ = $self->_readline;
s/^\s+//;
my ( $lambda, $kappa, $entropy ) = split;
if ($gapped_stats) {
$self->element(
{
'Name' => "Statistics_gapped_lambda",
'Data' => $lambda
}
);
$self->element(
{
'Name' => "Statistics_gapped_kappa",
'Data' => $kappa
}
);
$self->element(
{
'Name' => "Statistics_gapped_entropy",
'Data' => $entropy
}
);
}
else {
$self->element(
{
'Name' => "Statistics_lambda",
'Data' => $lambda
}
);
$self->element(
{
'Name' => "Statistics_kappa",
'Data' => $kappa
}
);
$self->element(
{
'Name' => "Statistics_entropy",
'Data' => $entropy
}
);
}
}
elsif (m/effective\s+search\s+space\s+used:\s+(\d+)/ox) {
$self->element(
{
'Name' => 'Statistics_eff-spaceused',
'Data' => $1
}
);
}
elsif (m/effective\s+search\s+space:\s+(\d+)/ox) {
$self->element(
{
'Name' => 'Statistics_eff-space',
'Data' => $1
}
);
}
elsif (
m/Gap\s+Penalties:\s+Existence:\s+(\d+)\, \s+Extension:\s+(\d+)/ox
)
{
$self->element(
{
'Name' => 'Parameters_gap-open',
'Data' => $1
}
);
$self->element(
{
'Name' => 'Parameters_gap-extend',
'Data' => $2
}
);
}
elsif (/effective\s+HSP\s+length:\s+(\d+)/) {
$self->element(
{
'Name' => 'Statistics_hsp-len',
'Data' => $1
}
);
}
elsif (/effective\s+length\s+of\s+query:\s+([\d\,]+)/) {
my $c = $1;
$c =~ s/\,//g;
$self->element(
{
'Name' => 'Statistics_query-len',
'Data' => $c
}
);
}
elsif (/effective\s+length\s+of\s+database:\s+([\d\,]+)/) {
my $c = $1;
$c =~ s/\,//g;
$self->element(
{
'Name' => 'Statistics_eff-dblen',
'Data' => $c
}
);
}
elsif (
/^(T|A|X1|X2|X3|S1|S2):\s+(\d+(\.\d+)?)\s+(?:\(\s*(\d+\.\d+) bits\))?/
)
{
my $v = $2;
chomp($v);
$self->element(
{
'Name' => "Statistics_$1",
'Data' => $v
}
);
if ( defined $4 ) {
$self->element(
{
'Name' => "Statistics_$1_bits",
'Data' => $4
}
);
}
}
elsif (
m/frameshift\s+window\, \s+decay\s+const:\s+(\d+)\,\s+([\.\d]+)/x
)
{
$self->element(
{
'Name' => 'Statistics_framewindow',
'Data' => $1
}
);
$self->element(
{
'Name' => 'Statistics_decay',
'Data' => $2
}
);
}
elsif (m/^Number\s+of\s+Hits\s+to\s+DB:\s+(\S+)/ox) {
$self->element(
{
'Name' => 'Statistics_hit_to_db',
'Data' => $1
}
);
}
elsif (m/^Number\s+of\s+extensions:\s+(\S+)/ox) {
$self->element(
{
'Name' => 'Statistics_num_extensions',
'Data' => $1
}
);
}
elsif (
m/^Number\s+of\s+successful\s+extensions:\s+ (\S+)/ox
)
{
$self->element(
{
'Name' => 'Statistics_num_suc_extensions',
'Data' => $1
}
);
}
elsif (
m/^Number\s+of\s+sequences\s+better\s+than\s+ (\S+):\s+(\d+)/ox
)
{
$self->element(
{
'Name' => 'Parameters_expect',
'Data' => $1
}
);
$self->element(
{
'Name' => 'Statistics_seqs_better_than_cutoff',
'Data' => $2
}
);
}
elsif (/^\s+Posted\s+date:\s+(.+)/) {
my $d = $1;
chomp($d);
$self->element(
{
'Name' => 'Statistics_posted_date',
'Data' => $d
}
);
}
elsif ( !/^\s+$/ ) {
}
}
$last = $_;
}
} elsif ( $self->in_element('hsp') ) {
$self->debug("blast.pm: Processing HSP\n");
$self->{'_reporttype'} ||= $DEFAULTREPORTTYPE;
my %data = (
'Query' => '',
'Mid' => '',
'Hit' => ''
);
my $len;
for ( my $i = 0 ; defined($_) && $i < 3 ; $i++ ) {
if ( ( $i == 0 && /^\s+$/) ||
/^\s*(?:Lambda|Minus|Plus|Score)/i )
{
$self->_pushback($_) if defined $_;
$self->end_element( { 'Name' => 'Hsp' } );
last;
}
chomp;
if (/^((Query|Sbjct):?\s+(\-?\d+)\s*)(\S+)\s+(\-?\d+)/) {
my ( $full, $type, $start, $str, $end ) =
( $1, $2, $3, $4, $5 );
if ( $str eq '-' ) {
$i = 3 if $type eq 'Sbjct';
}
else {
$data{$type} = $str;
}
$len = length($full);
$self->{"\_$type"}->{'begin'} = $start
unless $self->{"_$type"}->{'begin'};
$self->{"\_$type"}->{'end'} = $end;
} else {
$self->throw("no data for midline $_")
unless ( defined $_ && defined $len );
$data{'Mid'} = substr( $_, $len );
}
$_ = $self->_readline();
}
$self->characters(
{
'Name' => 'Hsp_qseq',
'Data' => $data{'Query'}
}
);
$self->characters(
{
'Name' => 'Hsp_hseq',
'Data' => $data{'Sbjct'}
}
);
$self->characters(
{
'Name' => 'Hsp_midline',
'Data' => $data{'Mid'}
}
);
}
else {
}
}
$self->debug("blast.pm: End of BlastOutput\n");
if ( $self->{'_seentop'} ) {
$self->within_element('hsp')
&& $self->end_element( { 'Name' => 'Hsp' } );
$self->within_element('hit')
&& $self->end_element( { 'Name' => 'Hit' } );
$self->within_element('iteration')
&& $self->end_element( { 'Name' => 'Iteration' } );
if ($bl2seq_fix) {
$self->element(
{
'Name' => 'BlastOutput_program',
'Data' => $reporttype
}
);
}
$self->end_element( { 'Name' => 'BlastOutput' } );
}
return $self->end_document();} |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _