| Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
use Bio::SearchIO;
my $searchin = new Bio::SearchIO(-format => 'blastxml',
-file => 't/data/plague_yeast.bls.xml');
while( my $result = $searchin->next_result ) {
}
# one can also request that the parser NOT keep the XML data in memory
# by using the tempfile initialization flag.
my $searchin = new Bio::SearchIO(-tempfile => 1,
-format => 'blastxml',
-file => 't/data/plague_yeast.bls.xml');
while( my $result = $searchin->next_result ) {
}
| BEGIN | Code | |
| _initialize | Description | Code |
| next_result | Description | Code |
| start_document | Description | Code |
| end_document | Description | Code |
| start_element | Description | Code |
| end_element | Description | Code |
| characters | Description | Code |
| use_tempfile | Description | Code |
| result_count | No description | Code |
| _initialize | code | next | Top |
Title : _initialize Usage : private Function: Initializes the object - this is chained through new in SearchIO |
| next_result | code | prev | next | Top |
Title : next_result Usage : my $hit = $searchio->next_result; Function: Returns the next Result from a search Returns : Bio::Search::Result::ResultI object Args : none |
| start_document | code | prev | next | Top |
Title : start_document Usage : $parser->start_document; Function: SAX method to indicate starting to parse a new document Returns : none Args : none |
| end_document | code | prev | next | Top |
Title : end_document Usage : $parser->end_document; Function: SAX method to indicate finishing parsing a new document Returns : Bio::Search::Result::ResultI object Args : none |
| start_element | code | prev | next | Top |
Title : start_element Usage : $parser->start_element($data) Function: SAX method to indicate starting a new element Returns : none Args : hash ref for data |
| end_element | code | prev | next | Top |
Title : end_element Usage : $parser->end_element($data) Function: Signals finishing an element Returns : Bio::Search object dpending on what type of element Args : hash ref for data |
| characters | code | prev | next | Top |
Title : characters Usage : $parser->characters($data) Function: Signals new characters to be processed Returns : characters read Args : hash ref with the key 'Data' |
| use_tempfile | code | prev | next | Top |
Title : use_tempfile Usage : $obj->use_tempfile($newval) Function: Get/Set boolean flag on whether or not use a tempfile Example : Returns : value of use_tempfile Args : newvalue (optional) |
| BEGIN | Top |
# mapping of NCBI Blast terms to Bioperl hash keys}
%MODEMAP = ('BlastOutput' => 'result', 'Hit' => 'hit', 'Hsp' => 'hsp' ); %MAPPING = ( # HSP specific fields
'Hsp_bit-score' => 'HSP-bits', 'Hsp_score' => 'HSP-score', 'Hsp_evalue' => 'HSP-evalue', 'Hsp_query-from' => 'HSP-query_start', 'Hsp_query-to' => 'HSP-query_end', 'Hsp_hit-from' => 'HSP-hit_start', 'Hsp_hit-to' => 'HSP-hit_end', 'Hsp_positive' => 'HSP-conserved', 'Hsp_identity' => 'HSP-identical', 'Hsp_gaps' => 'HSP-gaps', 'Hsp_hitgaps' => 'HSP-hit_gaps', 'Hsp_querygaps' => 'HSP-query_gaps', 'Hsp_qseq' => 'HSP-query_seq', 'Hsp_hseq' => 'HSP-hit_seq', 'Hsp_midline' => 'HSP-homology_seq', 'Hsp_align-len' => 'HSP-hsp_length', 'Hsp_query-frame'=> 'HSP-query_frame', 'Hsp_hit-frame' => 'HSP-hit_frame', # these are ignored for now
'Hsp_num' => 'HSP-order', 'Hsp_pattern-from' => 'patternend', 'Hsp_pattern-to' => 'patternstart', 'Hsp_density' => 'hspdensity', # Hit specific fields
'Hit_id' => 'HIT-name', 'Hit_len' => 'HIT-length', 'Hit_accession' => 'HIT-accession', 'Hit_def' => 'HIT-description', 'Hit_num' => 'HIT-order', 'Iteration_iter-num' => 'HIT-iteration', 'Iteration_stat' => 'HIT-iteration_statistic', 'BlastOutput_program' => 'RESULT-algorithm_name', 'BlastOutput_version' => 'RESULT-algorithm_version', 'BlastOutput_query-def' => 'RESULT-query_description', 'BlastOutput_query-len' => 'RESULT-query_length', 'BlastOutput_db' => 'RESULT-database_name', 'BlastOutput_reference' => 'RESULT-program_reference', 'BlastOutput_query-ID' => 'runid', 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'}, 'Parameters_expect' => { 'RESULT-parameters' => 'expect'}, 'Parameters_include' => { 'RESULT-parameters' => 'include'}, 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'}, 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'}, 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'}, 'Parameters_gap-extend'=> { 'RESULT-parameters' => 'gapext'}, 'Parameters_filter' => {'RESULT-parameters' => 'filter'}, 'Statistics_db-num' => 'RESULT-database_entries', 'Statistics_db-len' => 'RESULT-database_letters', 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'}, 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'}, 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' }, 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' }, 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'}, ); eval { require Time::HiRes }; if( $@ ) { $DEBUG = 0;
| _initialize | description | prev | next | Top |
my ($self,@args) = @_; $self->SUPER::_initialize(@args); my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)],@args); defined $usetempfile && $self->use_tempfile($usetempfile); $self->{'_xmlparser'} = new XML::Parser::PerlSAX(); $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);}
| next_result | description | prev | next | Top |
my ($self) = @_; my $data = ''; my $firstline = 1; my ($tfh); if( $self->use_tempfile ) { $tfh = IO::File->new_tmpfile or $self->throw("Unable to open temp file: $!"); $tfh->autoflush(1); } my ($sawxmlheader,$okaytoprocess,$sawdoctype); while( defined( $_ = $self->_readline) ) { if( /^RPS-BLAST/i ) { $self->{'_type'} = 'RPSBLAST'; next; } if( /^<\?xml version/ ) { if( ! $firstline ) { $self->_pushback($_); last; } $sawxmlheader = 1; } # for the non xml version prefixed in each section}
if( /DOCTYPE/ ) { #|| /<BlastOutput>/
if( $sawdoctype ) { if( ! $sawxmlheader ) { $self->_pushback("<?xml version=\"1.0\"?>\n"); } $self->_pushback($_); last; } $sawdoctype = 1; unless( $sawxmlheader ) { $self->debug( "matched here\n"); $self->_pushback("<?xml version=\"1.0\"?>\n"); $self->_pushback($_); next; } } $okaytoprocess = 1; if( defined $tfh ) { print $tfh $_; } else { $data .= $_; } $firstline = 0; } return undef unless( $okaytoprocess); my %parser_args; if( defined $tfh ) { seek($tfh,0,0); %parser_args = ('Source' => { 'ByteStream' => $tfh }, 'Handler' => $self); } else { %parser_args = ('Source' => { 'String' => $data }, 'Handler' => $self); } my $result; my $starttime; if( $DEBUG ) { $starttime = [ Time::HiRes::gettimeofday() ]; } eval { $result = $self->{'_xmlparser'}->parse(%parser_args); $self->{'_result_count'}++; }; if( $@ ) { $self->warn("error in parsing a report:\n $@"); $result = undef; } if( $DEBUG ) { $self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime))); } # parsing magic here - but we call event handlers rather than
# instantiating things
return $result;
| start_document | description | prev | next | Top |
my ($self) = @_; $self->{'_lasttype'} = ''; $self->{'_values'} = {}; $self->{'_result'}= undef;}
| end_document | description | prev | next | Top |
my ($self,@args) = @_; return $self->{'_result'};}
| start_element | description | prev | next | Top |
my ($self,$data) = @_; # we currently don't care about attributes}
my $nm = $data->{'Name'}; if( my $type = $MODEMAP{$nm} ) { if( $self->_eventHandler->will_handle($type) ) { my $func = sprintf("start_%s",lc $type); $self->_eventHandler->$func($data->{'Attributes'}); } } if($nm eq 'BlastOutput') { $self->{'_values'} = {}; $self->{'_result'}= undef; }
| end_element | description | prev | next | Top |
my ($self,$data) = @_; my $nm = $data->{'Name'}; my $rc; if($nm eq 'BlastOutput_program' && $self->{'_last_data'} =~ /(t?blast[npx])/i ) { $self->{'_type'} = uc $1; } if( my $type = $MODEMAP{$nm} ) { if( $self->_eventHandler->will_handle($type) ) { my $func = sprintf("end_%s",lc $type); $rc = $self->_eventHandler->$func($self->{'_type'}, $self->{'_values'}); } } elsif( $MAPPING{$nm} ) { if ( ref($MAPPING{$nm}) =~ /hash/i ) { my $key = (keys %{$MAPPING{$nm}})[0]; $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'}; } else { $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'}; } } elsif( $nm eq 'Iteration' || $nm eq 'Hit_hsps' || $nm eq 'Parameters' || $nm eq 'BlastOutput_param' || $nm eq 'Iteration_hits' || $nm eq 'Statistics' || $nm eq 'BlastOutput_iterations' ){ } else { $self->debug("ignoring unrecognized element type $nm\n"); } $self->{'_last_data'} = ''; # remove read data if we are at}
# end of an element
$self->{'_result'} = $rc if( $nm eq 'BlastOutput' ); return $rc;
| characters | description | prev | next | Top |
my ($self,$data) = @_; return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ ); $self->{'_last_data'} = &decode_entities($data->{'Data'});}
| use_tempfile | description | prev | next | Top |
my ($self,$value) = @_; if( defined $value) { $self->{'_use_tempfile'} = $value; } return $self->{'_use_tempfile'};}
| result_count | description | prev | next | Top |
my $self = shift; return $self->{'_result_count'};}
| FEEDBACK | Top |
| Mailing Lists | Top |
bioperl-l@bioperl.org - General discussion http://bioperl.org/MailList.shtml - About the mailing lists
| Reporting Bugs | Top |
bioperl-bugs@bioperl.org http://bugzilla.bioperl.org/
| AUTHOR - Jason Stajich | Top |
| CONTRIBUTORS | Top |
| APPENDIX | Top |
| new | Top |
Title : new
Usage : my $searchio = new Bio::SearchIO(-format => 'blastxml',
-file => 'filename',
-tempfile => 1);
Function: Initializes the object - this is chained through new in SearchIO
Returns : Bio::SearchIO::blastxml object
Args : One additional argument from the format and file/fh parameters.
-tempfile => boolean. Defaults to false. Write out XML data
to a temporary filehandle to send to
PerlSAX parser.| SAX methods | Top |