Bio::Index Blast
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
Bio::Index::Blast - Indexes Blast reports and supports retrieval based on query accession(s)
Package variables
No package variables defined.
Included modules
Bio::Index::Abstract
Bio::Root::Root
Bio::Tools::BPlite
IO::String
Inherit
Bio::Index::Abstract Bio::Root::Root
Synopsis
    use strict;
    use Bio::Index::Blast;
    my $index = new Bio::Index::Blast(-filename => $indexfile,
				      -write_flag => 1);
    $index->make_index($file1, $file2);

    my $data = $index->get_stream($id);

    my $bplite_report = $index->fetch_report($id);
    print "query is ", $bplite_report->query, "\n";
    while( my $sbjct = $bplite_report->nextSbjct ) {
	print $sbjct->name, "\n";
	while( my $hsp = $sbjct->nextHSP ) {
	    print "\t e-value ", $hsp->P,
	}
	print "\n";
    }
Description
This object allows one to build an index on a blast file (or files)
and provide quick access to the blast report for that accession.
Note: for best results 'use strict'.
Methods
BEGIN Code
_versionDescriptionCode
newDescriptionCode
fetch_reportDescriptionCode
id_parserDescriptionCode
default_id_parserDescriptionCode
_index_fileDescriptionCode
_process_report
No description
Code
Methods description
_versioncode    nextTop
  Title   : _version
  Usage   : $type = $index->_version()
  Function: Returns a string which identifes the version of an
            index module.  Used to permanently identify an index
            file as having been created by a particular version
            of the index module.  Must be provided by the sub class
  Example : 
  Returns : 
  Args    : none
newcodeprevnextTop
  Usage   : $index = Bio::Index::Abstract->new(
                -filename    => $dbm_file,
                -write_flag  => 0,  
                -dbm_package => 'DB_File',
                -verbose     => 0);
  Function: Returns a new index object.  If filename is
            specified, then open_dbm() is immediately called. 
            Bio::Index::Abstract->new() will usually be called
            directly only when opening an existing index.
  Returns : A new index object
  Args    : -filename    The name of the dbm index file.
            -write_flag  TRUE if write access to the dbm file is
                         needed.
            -dbm_package The Perl dbm module to use for the
                         index.
            -verbose     Print debugging output to STDERR if
                         TRUE.
fetch_reportcodeprevnextTop
 Title   : fetch_report
 Usage   : my $blastreport = $idx->fetch_report($id);
 Function: Returns a Bio::Tools::BPlite report object 
           for a specific blast report
 Returns : Bio::Tools::BPlite
 Args    : valid id
id_parsercodeprevnextTop
  Title   : id_parser
  Usage   : $index->id_parser( CODE )
  Function: Stores or returns the code used by record_id to
            parse the ID for record from a string.  Useful
            for (for instance) specifying a different
            parser for different flavours of blast dbs. 
            Returns \&default_id_parser (see below) if not
            set. If you supply your own id_parser
            subroutine, then it should expect a fasta
            description line.  An entry will be added to
            the index for each string in the list returned.
  Example : $index->id_parser( \&my_id_parser )
  Returns : ref to CODE if called without arguments
  Args    : CODE
default_id_parsercodeprevnextTop
  Title   : default_id_parser
  Usage   : $id = default_id_parser( $header )
  Function: The default Blast Query ID parser for Bio::Index::Blast.pm
            Returns $1 from applying the regexp /^>\s*(\S+)/
            to $header.
  Returns : ID string
  Args    : a header line string
_index_filecodeprevnextTop
  Title   : _index_file
  Usage   : $index->_index_file( $file_name, $i )
  Function: Specialist function to index BLAST report file(s).
            Is provided with a filename and an integer
            by make_index in its SUPER class.
  Example : 
  Returns : 
  Args    :
Methods code
BEGINTop
BEGIN {
     $VERSION = 0.1;
}
_versiondescriptionprevnextTop
sub _version {
    return $VERSION;
}
newdescriptionprevnextTop
sub new {
  my($class,@args) = @_;

  my $self = $class->SUPER::new(@args);
}
fetch_reportdescriptionprevnextTop
sub fetch_report {
    my ($self,$id) = @_;
    my $fh = $self->get_stream($id);
    my $report = new Bio::Tools::BPlite(-fh      => $fh,
					-noclose => 1);
    return $report;
}
id_parserdescriptionprevnextTop
sub id_parser {
    my( $self, $code ) = @_;
    
    if ($code) {
        $self->{'_id_parser'} = $code;
    }
    return $self->{'_id_parser'} ||\& default_id_parser;
}
default_id_parserdescriptionprevnextTop
sub default_id_parser {
        if ($_[0] =~ /^\s*(\S+)/) {
        return $1;
    } else {
        return;
    }
}
_index_filedescriptionprevnextTop
sub _index_file {
    my( $self,
        $file, # File name
$i, # Index-number of file being indexed
) = @_; my( $begin, # Offset from start of file of the start
# of the last found record.
); open(BLAST, "<$file") or die("cannot open file $file\n"); my (@data, @records); my $indexpoint = 0; my $lastline = 0; while(<BLAST> ) { if( /(T)?BLAST[PNX]/ ) { if( @data ) { # if we have already read a report
# then store the data for this report
# in the CURRENT index
$self->_process_report($indexpoint, $i,join("",@data)); } # handle fencepost problem (beginning)
# by skipping here when empty
# since we are at the beginning of a new report
# store this begin location for the next index
$indexpoint = $lastline; @data = (); } push @data, $_; $lastline = tell(BLAST); } # handle fencepost problem (end)
if( @data ) { $self->_process_report($indexpoint,$i,join("",@data)); }
}
_process_reportdescriptionprevnextTop
sub _process_report {
    my ($self,$begin,$i,$data) = @_;
    
    if( ! $data ) { 
	$self->warn("calling _process_report without a valid data string"); 
	return ; 
    }
    my $id_parser = $self->id_parser;

    my $datal = new IO::String($data);
    my $report = new Bio::Tools::BPlite(-fh      => $datal,
					-noclose => 1);
    
    my $query = $report->query;		
    foreach my $id (&$id_parser($query)) {
	print "id is $id, begin is $begin\n" if( $self->verbose > 0);
	$self->add_record($id, $i, $begin);
    }
}
General documentation
FEEDBACKTop
Mailing ListsTop
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
  bioperl-l@bioperl.org            - General discussion
http://bioperl.org/MailList.shtml  - About the mailing lists
Reporting BugsTop
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
email or the web:
  bioperl-bugs@bioperl.org
  http://bugzilla.bioperl.org/
AUTHOR - Jason StajichTop
Email jason@cgt.mc.duke.edu
Describe contact details here
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
Bio::Index::Blast implemented methodsTop
Require methods from Bio::Index::AbstractTop
Bio::Index::Abstract methodsTop
filenameTop
 Title   : filename
 Usage   : $value = $self->filename();
           $self->filename($value);
 Function: Gets or sets the name of the dbm index file.
 Returns : The current value of filename
 Args    : Value of filename if setting, or none if
           getting the value.
write_flagTop
 Title   : write_flag
 Usage   : $value = $self->write_flag();
           $self->write_flag($value);
 Function: Gets or sets the value of write_flag, which
           is wether the dbm file should be opened with
           write access.
 Returns : The current value of write_flag (default 0)
 Args    : Value of write_flag if setting, or none if
           getting the value.
dbm_packageTop
 Usage   : $value = $self->dbm_package();
           $self->dbm_package($value);

 Function: Gets or sets the name of the Perl dbm module used. 
           If the value is unset, then it returns the value of
           the package variable $USE_DBM_TYPE or if that is
           unset, then it chooses the best available dbm type,
           choosing 'DB_File' in preference to 'SDBM_File'. 
           Bio::Abstract::Index may work with other dbm file
           types.

 Returns : The current value of dbm_package
 Args    : Value of dbm_package if setting, or none if
           getting the value.
get_streamTop
 Title   : get_stream
 Usage   : $stream = $index->get_stream( $id );
 Function: Returns a file handle with the file pointer
           at the approprite place

           This provides for a way to get the actual
           file contents and not an object 

           WARNING: you must parse the record deliminter
           *yourself*. Abstract wont do this for you 
           So this code

           $fh = $index->get_stream($myid);
           while( <$fh> ) {
              # do something
           }
           will parse the entire file if you don't put in
           a last statement in, like

           while( <$fh> ) {
              /^\/\// && last; # end of record
              # do something
           }

 Returns : A filehandle object
 Args    : string represents the accession number
 Notes   : This method should not be used without forethought
open_dbmTop
  Usage   : $index->open_dbm()
  Function: Opens the dbm file associated with the index
            object.  Write access is only given if explicitly
            asked for by calling new(-write => 1) or having set
            the write_flag(1) on the index object.  The type of
            dbm file opened is that returned by dbm_package(). 
            The name of the file to be is opened is obtained by
            calling the filename() method.

  Example : $index->_open_dbm()
  Returns : 1 on success
_filenameTop
  Title   : _filename
  Usage   : $index->_filename( FILE INT )
  Function: Indexes the file
  Example : 
  Returns : 
  Args    :
_file_handleTop
  Title   : _file_handle
  Usage   : $fh = $index->_file_handle( INT )
  Function: Returns an open filehandle for the file
            index INT.  On opening a new filehandle it
            caches it in the @{$index->_filehandle} array.
            If the requested filehandle is already open,
            it simply returns it from the array.
  Example : $fist_file_indexed = $index->_file_handle( 0 );
  Returns : ref to a filehandle
  Args    : INT
_file_countTop
  Title   : _file_count
  Usage   : $index->_file_count( INT )
  Function: Used by the index building sub in a sub class to
            track the number of files indexed.  Sets or gets
            the number of files indexed when called with or
            without an argument.
  Example : 
  Returns : INT
  Args    : INT
add_recordTop
  Title   : add_record
  Usage   : $index->add_record( $id, @stuff );
  Function: Calls pack_record on @stuff, and adds the result
            of pack_record to the index database under key $id.
            If $id is a reference to an array, then a new entry
            is added under a key corresponding to each element
            of the array.
  Example : $index->add_record( $id, $fileNumber, $begin, $end )
  Returns : TRUE on success or FALSE on failure
  Args    : ID LIST
pack_recordTop
  Title   : pack_record
  Usage   : $packed_string = $index->pack_record( LIST )
  Function: Packs an array of scalars into a single string
            joined by ASCII 034 (which is unlikely to be used
            in any of the strings), and returns it. 
  Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
  Returns : STRING or undef
  Args    : LIST
unpack_recordTop
  Title   : unpack_record
  Usage   : $index->unpack_record( STRING )
  Function: Splits the sting provided into an array,
            splitting on ASCII 034.
  Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
  Returns : A 3 element ARRAY
  Args    : STRING containing ASCII 034
DESTROYTop
 Title   : DESTROY
 Usage   : Called automatically when index goes out of scope
 Function: Closes connection to database and handles to
           sequence files
 Returns : NEVER
 Args    : NONE