Bio::Search::Hit
GenericHit
Summary
Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
use Bio::Search::Hit::GenericHit;
my $hit = new Bio::Search::Hit::GenericHit(-algorithm => 'blastp');
# typically one gets HitI objects from a SearchIO stream via a ResultI
use Bio::SearchIO;
my $parser = new Bio::SearchIO(-format => 'blast', -file => 'result.bls');
my $result = $parser->next_result;
my $hit = $result->next_hit;
# TODO: Describe how to configure a SearchIO stream so that it generates
# GenericHit objects.
Description
This object handles the hit data from a Database Sequence Search such
as FASTA or BLAST.
Unless you're writing a parser, you won't ever need to create a
GenericHit or any other HitI-implementing object. If you use
the SearchIO system, HitI objects are created automatically from
a SearchIO stream which returns Bio::Search::Hit::HitI objects.
For documentation on what you can do with GenericHit (and other HitI
objects), please see the API documentation in
Bio::Search::Hit::HitI.
Methods
Methods description
Title : new
Usage : my $obj = new Bio::Search::Hit::GenericHit();
Function: Builds a new Bio::Search::Hit::GenericHit object
Returns : Bio::Search::Hit::GenericHit
Args : -name => Name of Hit (required)
-description => Description (optional)
-accession => Accession number (optional)
-length => Length of the Hit (optional)
-score => Raw Score for the Hit (optional)
-bits => Bit Score for the Hit (optional)
-significance => Significance value for the Hit (optional)
-algorithm => Algorithm used (BLASTP, FASTX, etc...)
-hsps => Array ref of HSPs for this Hit.
-found_again => boolean, true if hit appears in a
"previously found" section of a PSI-Blast report. |
Title : add_hsp
Usage : $hit->add_hsp($hsp)
Function: Add a HSP to the collection of HSPs for a Hit
Returns : number of HSPs in the Hit
Args : Bio::Search::HSP::HSPI object |
Title : name
Usage : $hit_name = $hit->name();
Function: returns the name of the Hit sequence
Returns : a scalar string
Args : [optional] scalar string to set the name |
Title : accession
Usage : $acc = $hit->accession();
Function: Retrieve the accession (if available) for the hit
Returns : a scalar string (empty string if not set)
Args : none |
Title : description
Usage : $desc = $hit->description();
Function: Retrieve the description for the hit
Returns : a scalar string
Args : [optional] scalar string to set the descrition |
Title : length
Usage : my $len = $hit->length
Function: Returns the length of the hit
Returns : integer
Args : [optional] integer to set the length |
Title : algorithm
Usage : $alg = $hit->algorithm();
Function: Gets the algorithm specification that was used to obtain the hit
For BLAST, the algorithm denotes what type of sequence was aligned
against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
dna-prt, TBLASTN prt-translated dna, TBLASTX translated
dna-translated dna).
Returns : a scalar string
Args : [optional] scalar string to set the algorithm |
Title : raw_score
Usage : $score = $hit->raw_score();
Function: Gets the "raw score" generated by the algorithm. What
this score is exactly will vary from algorithm to algorithm,
returning undef if unavailable.
Returns : a scalar value
Args : [optional] scalar value to set the raw score |
Title : significance
Usage : $significance = $hit->significance();
Function: Used to obtain the E or P value of a hit, i.e. the probability that
this particular hit was obtained purely by random chance. If
information is not available (nor calculatable from other
information sources), return undef.
Returns : a scalar value or undef if unavailable
Args : [optional] scalar value to set the significance |
Usage : $hit_object->bits();
Purpose : Gets the bit score of the best HSP for the current hit.
Example : $bits = $hit_object->bits();
Returns : Integer or undef if bit score is not set
Argument : n/a
Comments : For BLAST1, the non-bit score is listed in the summary line.
See Also : score() |
Title : next_hsp
Usage : while( $hsp = $obj->next_hsp()) { ... }
Function : Returns the next available High Scoring Pair
Example :
Returns : Bio::Search::HSP::HSPI object or null if finished
Args : none |
Usage : $hit_object->hsps();
Purpose : Get a list containing all HSP objects.
: Get the numbers of HSPs for the current hit.
Example : @hsps = $hit_object->hsps();
: $num = $hit_object->hsps(); # alternatively, use num_hsps()
Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
: Scalar context: integer (number of HSPs).
: (Equivalent to num_hsps()).
Argument : n/a. Relies on wantarray
Throws : Exception if the HSPs have not been collected.
See Also : hsp(), num_hsps() |
Usage : $hit_object->num_hsps();
Purpose : Get the number of HSPs for the present Blast hit.
Example : $nhsps = $hit_object->num_hsps();
Returns : Integer
Argument : n/a
Throws : Exception if the HSPs have not been collected.
See Also : hsps() |
Title : rewind
Usage : $hit->rewind;
Function: Allow one to reset the HSP iterator to the beginning
Since this is an in-memory implementation
Returns : none
Args : none |
Usage : $ambig_code = $hit_object->ambiguous_aln();
Purpose : Sets/Gets ambiguity code data member.
Example : (see usage)
Returns : String = 'q', 's', 'qs', '-'
: 'q' = query sequence contains overlapping sub-sequences
: while sbjct does not.
: 's' = sbjct sequence contains overlapping sub-sequences
: while query does not.
: 'qs' = query and sbjct sequence contains overlapping sub-sequences
: relative to each other.
: '-' = query and sbjct sequence do not contains multiple domains
: relative to each other OR both contain the same distribution
: of similar domains.
Argument : n/a
Throws : n/a
Comment : Note: "sbjct" is synonymous with "hit" |
Usage : $hit_object->n();
Purpose : Gets the N number for the current hit.
: This is the number of HSPs in the set which was ascribed
: the lowest P-value (listed on the description line).
: This number is not the same as the total number of HSPs.
: To get the total number of HSPs, use num_hsps().
Example : $n = $hit_object->n();
Returns : Integer
Argument : n/a
Throws : Exception if HSPs have not been set (BLAST2 reports).
Comments : Note that the N parameter is not reported in gapped BLAST2.
: Calling n() on such reports will result in a call to num_hsps().
: The num_hsps() method will count the actual number of
: HSPs in the alignment listing, which may exceed N in
: some cases.
See Also : num_hsps() |
Usage : $hit_object->p( [format] );
Purpose : Get the P-value for the best HSP of the given BLAST hit.
: (Note that P-values are not provided with NCBI Blast2 reports).
Example : $p = $sbjct->p;
: $p = $sbjct->p('exp'); # get exponent only.
: ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
Returns : Float or scientific notation number (the raw P-value, DEFAULT).
: Integer if format == 'exp' (the magnitude of the base 10 exponent).
: 2-element list (float, int) if format == 'parts' and P-value
: is in scientific notation (See Comments).
Argument : format: string of 'raw' | 'exp' | 'parts'
: 'raw' returns value given in report. Default. (1.2e-34)
: 'exp' returns exponent value only (34)
: 'parts' returns the decimal and exponent as a
: 2-element list (1.2, -34) (See Comments).
Throws : Warns if no P-value is defined. Uses expect instead.
Comments : Using the 'parts' argument is not recommended since it will not
: work as expected if the P-value is not in scientific notation.
: That is, floats are not converted into sci notation before
: splitting into parts.
See Also : expect(), signif(), Bio::Search::SearchUtils::get_exponent() |
Usage : $hit_object->hsp( [string] );
Purpose : Get a single HSPI object for the present HitI object.
Example : $hspObj = $hit_object->hsp; # same as 'best'
: $hspObj = $hit_object->hsp('best');
: $hspObj = $hit_object->hsp('worst');
Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
Argument : String (or no argument).
: No argument (default) = highest scoring HSP (same as 'best').
: 'best' or 'first' = highest scoring HSP.
: 'worst' or 'last' = lowest scoring HSP.
Throws : Exception if the HSPs have not been collected.
: Exception if an unrecognized argument is used.
See Also : hsps(), num_hsps() |
Usage : $hit_object->logical_length( [seq_type] );
: (mostly intended for internal use).
Purpose : Get the logical length of the hit sequence.
: If the Blast is a TBLASTN or TBLASTX, the returned length
: is the length of the would-be amino acid sequence (length/3).
: For all other BLAST flavors, this function is the same as length().
Example : $len = $hit_object->logical_length();
Returns : Integer
Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
('sbjct' is synonymous with 'hit')
Throws : n/a
Comments : This is important for functions like frac_aligned_query()
: which need to operate in amino acid coordinate space when dealing
: with [T]BLAST[NX] type reports.
See Also : length(), frac_aligned_query(), frac_aligned_hit() |
Usage : $hit_object->length_aln( [seq_type] );
Purpose : Get the total length of the aligned region for query or sbjct seq.
: This number will include all HSPs
Example : $len = $hit_object->length_aln(); # default = query
: $lenAln = $hit_object->length_aln('query');
Returns : Integer
Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
('sbjct' is synonymous with 'hit')
Throws : Exception if the argument is not recognized.
Comments : This method will report the logical length of the alignment,
: meaning that for TBLAST[NX] reports, the length is reported
: using amino acid coordinate space (i.e., nucleotides / 3).
:
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically..
: If you don't want the tiled data, iterate through each HSP
: calling length() on each (use hsps() to get all HSPs).
See Also : length(), frac_aligned_query(), frac_aligned_hit(), gaps(), Bio::Search::SearchUtils::tile_hsps(), Bio::Search::HSP::BlastHSP::length() |
Usage : $hit_object->gaps( [seq_type] );
Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
: Data is summed across all HSPs.
Example : $qgaps = $hit_object->gaps('query');
: $hgaps = $hit_object->gaps('hit');
: $tgaps = $hit_object->gaps(); # default = total (query + hit)
Returns : scalar context: integer
: array context without args: two-element list of integers
: (queryGaps, hitGaps)
: Array context can be forced by providing an argument of 'list' or 'array'.
:
: CAUTION: Calling this method within printf or sprintf is arrray context.
: So this function may not give you what you expect. For example:
: printf "Total gaps: %d", $hit->gaps();
: Actually returns a two-element array, so what gets printed
: is the number of gaps in the query, not the total
:
Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
('sbjct' is synonymous with 'hit')
Throws : n/a
Comments : If you need data for each HSP, use hsps() and then interate
: through each HSP object.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically..
: Not relying on wantarray since that will fail in situations
: such as printf "%d", $hit->gaps() in which you might expect to
: be printing the total gaps, but evaluates to array context.
See Also : length_aln() |
Usage : $sbjct->start( [seq_type] );
Purpose : Gets the start coordinate for the query, sbjct, or both sequences
: in the BlastHit object. If there is more than one HSP, the lowest start
: value of all HSPs is returned.
Example : $qbeg = $sbjct->start('query');
: $sbeg = $sbjct->start('hit');
: ($qbeg, $sbeg) = $sbjct->start();
Returns : scalar context: integer
: array context without args: list of two integers (queryStart, sbjctStart)
: Array context can be "induced" by providing an argument of 'list' or 'array'.
Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
('sbjct' is synonymous with 'hit')
Throws : n/a
Comments : This method requires that all HSPs be tiled. If there is more than one
: HSP and they have not already been tiled, they will be tiled first automatically..
: Remember that the start and end coordinates of all HSPs are
: normalized so that start < end. Strand information can be
: obtained by calling $hit->strand().
See Also : end(), range(), strand(), Bio::Search::HSP::BlastHSP::start |
Usage : $sbjct->end( [seq_type] );
Purpose : Gets the end coordinate for the query, sbjct, or both sequences
: in the BlastHit object. If there is more than one HSP,
the largest end
: value of all HSPs is returned.
Example : $qend = $sbjct->end('query');
: $send = $sbjct->end('hit');
: ($qend, $send) = $sbjct->end();
Returns : scalar context: integer
: array context without args: list of two integers
: (queryEnd, sbjctEnd)
: Array context can be "induced" by providing an argument
: of 'list' or 'array'.
Argument : In scalar context: seq_type = 'query' or 'sbjct'
: (case insensitive). If not supplied, 'query' is used.
Throws : n/a
Comments : This method requires that all HSPs be tiled. If there is
: more than one HSP and they have not already been tiled,
: they will be tiled first automatically..
: Remember that the start and end coordinates of all HSPs are
: normalized so that start < end. Strand information can be
: obtained by calling $hit->strand().
See Also : start(), range(), strand() |
Usage : $sbjct->range( [seq_type] );
Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
: in the HSP alignment.
Example : ($qbeg, $qend) = $sbjct->range('query');
: ($sbeg, $send) = $sbjct->range('hit');
Returns : Two-element array of integers
Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
('sbjct' is synonymous with 'hit')
Throws : n/a
See Also : start(), end() |
Usage : $hit_object->frac_identical( [seq_type] );
Purpose : Get the overall fraction of identical positions across all HSPs.
: The number refers to only the aligned regions and does not
: account for unaligned regions in between the HSPs, if any.
Example : $frac_iden = $hit_object->frac_identical('query');
Returns : Float (2-decimal precision, e.g., 0.75).
Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
: default = 'query' (but see comments below).
: ('sbjct' is synonymous with 'hit')
Throws : n/a
Comments : Different versions of Blast report different values for the total
: length of the alignment. This is the number reported in the
: denominators in the stats section:
: "Identical = 34/120 Positives = 67/120".
: NCBI BLAST uses the total length of the alignment (with gaps)
: WU-BLAST uses the length of the query sequence (without gaps).
:
: Therefore, when called with an argument of 'total',
: this method will report different values depending on the
: version of BLAST used. Total does NOT take into account HSP
: tiling, so it should not be used.
:
: To get the fraction identical among only the aligned residues,
: ignoring the gaps, call this method without an argument or
: with an argument of 'query' or 'hit'.
:
: If you need data for each HSP, use hsps() and then iterate
: through the HSP objects.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically.
See Also : frac_conserved(), frac_aligned_query(), matches(), Bio::Search::SearchUtils::tile_hsps() |
Usage : $hit_object->frac_conserved( [seq_type] );
Purpose : Get the overall fraction of conserved positions across all HSPs.
: The number refers to only the aligned regions and does not
: account for unaligned regions in between the HSPs, if any.
Example : $frac_cons = $hit_object->frac_conserved('hit');
Returns : Float (2-decimal precision, e.g., 0.75).
Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
: default = 'query' (but see comments below).
: ('sbjct' is synonymous with 'hit')
Throws : n/a
Comments : Different versions of Blast report different values for the total
: length of the alignment. This is the number reported in the
: denominators in the stats section:
: "Positives = 34/120 Positives = 67/120".
: NCBI BLAST uses the total length of the alignment (with gaps)
: WU-BLAST uses the length of the query sequence (without gaps).
:
: Therefore, when called with an argument of 'total',
: this method will report different values depending on the
: version of BLAST used. Total does NOT take into account HSP
: tiling, so it should not be used.
:
: To get the fraction conserved among only the aligned residues,
: ignoring the gaps, call this method without an argument or
: with an argument of 'query' or 'hit'.
:
: If you need data for each HSP, use hsps() and then interate
: through the HSP objects.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically.
See Also : frac_identical(), matches(), Bio::Search::SearchUtils::tile_hsps() |
Usage : $hit_object->frac_aligned_query();
Purpose : Get the fraction of the query sequence which has been aligned
: across all HSPs (not including intervals between non-overlapping
: HSPs).
Example : $frac_alnq = $hit_object->frac_aligned_query();
Returns : Float (2-decimal precision, e.g., 0.75).
Argument : n/a
Throws : n/a
Comments : If you need data for each HSP, use hsps() and then interate
: through the HSP objects.
: To compute the fraction aligned, the logical length of the query
: sequence is used, meaning that for [T]BLASTX reports, the
: full length of the query sequence is converted into amino acids
: by dividing by 3. This is necessary because of the way
: the lengths of aligned sequences are computed.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically.
See Also : frac_aligned_hit(), logical_length(), length_aln(), Bio::Search::SearchUtils::tile_hsps() |
Usage : $hit_object->frac_aligned_hit();
Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
: across all HSPs (not including intervals between non-overlapping
: HSPs).
Example : $frac_alnq = $hit_object->frac_aligned_hit();
Returns : Float (2-decimal precision, e.g., 0.75).
Argument : n/a
Throws : n/a
Comments : If you need data for each HSP, use hsps() and then interate
: through the HSP objects.
: To compute the fraction aligned, the logical length of the sbjct
: sequence is used, meaning that for TBLAST[NX] reports, the
: full length of the sbjct sequence is converted into amino acids
: by dividing by 3. This is necessary because of the way
: the lengths of aligned sequences are computed.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically.
See Also : frac_aligned_query(), matches(), , logical_length(), length_aln(), Bio::Search::SearchUtils::tile_hsps() |
Usage : $hit_object->num_unaligned_hit();
Purpose : Get the number of the unaligned residues in the hit sequence.
: Sums across all all HSPs.
Example : $num_unaln = $hit_object->num_unaligned_hit();
Returns : Integer
Argument : n/a
Throws : n/a
Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
: They apply here as well.
: If you need data for each HSP, use hsps() and then interate
: through the HSP objects.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically..
See Also : num_unaligned_query(), Bio::Search::SearchUtils::tile_hsps(), frac_aligned_hit() |
Usage : $hit_object->num_unaligned_query();
Purpose : Get the number of the unaligned residues in the query sequence.
: Sums across all all HSPs.
Example : $num_unaln = $hit_object->num_unaligned_query();
Returns : Integer
Argument : n/a
Throws : n/a
Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
: They apply here as well.
: If you need data for each HSP, use hsps() and then interate
: through the HSP objects.
: This method requires that all HSPs be tiled. If they have not
: already been tiled, they will be tiled first automatically..
See Also : num_unaligned_hit(), frac_aligned_query(), Bio::Search::SearchUtils::tile_hsps() |
Usage : $hit->seq_inds( seq_type, class, collapse );
Purpose : Get a list of residue positions (indices) across all HSPs
: for identical or conserved residues in the query or sbjct sequence.
Example : @s_ind = $hit->seq_inds('query', 'identical');
: @h_ind = $hit->seq_inds('hit', 'conserved');
: @h_ind = $hit->seq_inds('hit', 'conserved', 1);
Returns : Array of integers
: May include ranges if collapse is non-zero.
Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
: ('sbjct' is synonymous with 'hit')
: [1] class = 'identical' or 'conserved' (default = 'identical')
: (can be shortened to 'id' or 'cons')
: (actually, anything not 'id' will evaluate to 'conserved').
: [2] collapse = boolean, if non-zero, consecutive positions are merged
: using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
: collapses to "1-5 7 9-11". This is useful for
: consolidating long lists. Default = no collapse.
Throws : n/a.
See Also : Bio::Search::HSP::BlastHSP::seq_inds() |
Title : rank
Usage : $obj->rank($newval)
Function: Get/Set the rank of this Hit in the Query search list
i.e. this is the Nth hit for a specific query
Returns : value of rank
Args : newvalue (optional) |
Title : locus
Usage : $locus = $hit->locus();
Function: Retrieve the locus (if available) for the hit
Returns : a scalar string (empty string if not set)
Args : none |
Title : each_accession_number
Usage : @each_accession_number = $hit->each_accession_number();
Function: Get each accession number listed in the description of the hit.
If there are no alternatives, then only the primary accession will
be given
Returns : list of all accession numbers in the description
Args : none |
Title : query_length
Usage : $obj->query_length($newval)
Function: Get/Set the query_length
Returns : value of query_length (a scalar)
Args : on set, new value (a scalar or undef, optional) |
Methods code
sub new
{ my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
$score,$algo,$signif,$bits,
$rank) = $self->_rearrange([qw(HSPS
NAME
QUERY_LEN
DESCRIPTION
ACCESSION
LOCUS
LENGTH SCORE ALGORITHM
SIGNIFICANCE BITS
RANK )], @args);
defined $query_len && $self->query_length($query_len);
if( ! defined $name ) {
$self->throw("Must have defined a valid name for Hit");
} else {
$self->name($name);
}
defined $acc && $self->accession($acc);
defined $locus && $self->locus($locus);
defined $desc && $self->description($desc);
defined $length && $self->length($length);
defined $algo && $self->algorithm($algo);
defined $signif && $self->significance($signif);
defined $score && $self->raw_score($score);
defined $bits && $self->bits($bits);
defined $rank && $self->rank($rank);
$self->{'_iterator'} = 0;
$self->{'_hsps'} = [];
if( defined $hsps ) {
if( ref($hsps) !~ /array/i ) {
$self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
} else {
while( @$hsps ) {
$self->add_hsp(shift @$hsps );
}
}
}
return $self;} |
sub add_hsp
{ my ($self,$hsp) = @_;
if( !defined $hsp || ! $hsp->isa('Bio::Search::HSP::HSPI') ) {
$self->throw("Must provide a valid Bio::Search::HSP::HSPI object to object: $self method: add_hsp value: $hsp");
return undef;
}
push @{$self->{'_hsps'}}, $hsp;
return scalar @{$self->{'_hsps'}};} |
sub name
{ my ($self,$value) = @_;
my $previous = $self->{'_name'};
if( defined $value || ! defined $previous ) {
$value = $previous = '' unless defined $value;
$self->{'_name'} = $value;
}
return $previous;} |
sub accession
{ my ($self,$value) = @_;
my $previous = $self->{'_accession'};
if( defined $value || ! defined $previous ) {
$value = $previous = '' unless defined $value;
$self->{'_accession'} = $value;
}
return $previous;} |
sub description
{ my ($self,$value) = @_;
my $previous = $self->{'_description'};
if( defined $value || ! defined $previous ) {
$value = $previous = '' unless defined $value;
$self->{'_description'} = $value;
}
return $previous;} |
sub length
{ my ($self,$value) = @_;
my $previous = $self->{'_length'};
if( defined $value || ! defined $previous ) {
$value = $previous = 0 unless defined $value;
$self->{'_length'} = $value;
}
return $previous;} |
sub algorithm
{ my ($self,$value) = @_;
my $previous = $self->{'_algorithm'};
if( defined $value || ! defined $previous ) {
$value = $previous = '' unless defined $value;
$self->{'_algorithm'} = $value;
}
return $previous;} |
sub raw_score
{ my ($self,$value) = @_;
my $previous = $self->{'_score'};
if( defined $value || ! defined $previous ) {
$value = $previous = '' unless defined $value;
$self->{'_score'} = $value;
}
return $previous;} |
sub score
{ shift->raw_score(@_); } |
sub significance
{ my ($self,$value) = @_;
my $previous = $self->{'_significance'};
if( defined $value ) {
$self->{'_significance'} = $value;
} elsif ( ! defined $previous ) {
unless( defined $self->{'_hsps'}->[0] ) {
$self->warn("No HSPs for this Hit (".$self->name.")");
return undef;
}
$previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
}
return $previous;} |
sub bits
{ my ($self,$value) = @_;
my $previous = $self->{'_bits'};
if( defined $value ) {
$self->{'_bits'} = $value;
} elsif ( ! defined $previous ) {
unless( defined $self->{'_hsps'}->[0] ) {
$self->warn("No HSPs for this Hit (".$self->name.")");
return undef;
}
$previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
}
return $previous; } |
sub next_hsp
{ my ($self) = @_;
$self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
return undef if $self->{'_iterator'} > scalar @{$self->{'_hsps'}};
return $self->{'_hsps'}->[$self->{'_iterator'}++];} |
sub hsps
{ my $self = shift;
if (not ref $self->{'_hsps'}) {
$self->throw("Can't get HSPs: data not collected.");
}
return wantarray
? @{$self->{'_hsps'}}
: scalar(@{$self->{'_hsps'}}); } |
sub num_hsps
{ my $self = shift;
if (not defined $self->{'_hsps'}) {
$self->throw("Can't get HSPs: data not collected.");
}
return scalar(@{$self->{'_hsps'}});} |
sub rewind
{ my ($self) = @_;
$self->{'_iterator'} = 0;} |
sub ambiguous_aln
{ my $self = shift;
if(@_) { $self->{'_ambiguous_aln'} = shift; }
$self->{'_ambiguous_aln'} || '-'; } |
sub overlap
{ my $self = shift;
if(@_) { $self->{'_overlap'} = shift; }
defined $self->{'_overlap'} ? $self->{'_overlap'} : 0; } |
sub n
{ my $self = shift;
my ($n);
if(not defined($self->{'_n'})) {
if( $self->hsp ) {
$n = $self->hsp->n;
}
} else {
$n = $self->{'_n'};
}
$n ||= $self->num_hsps;
return $n; } |
sub p
{
my ($self, $fmt) = @_;
my $val = $self->{'_p'};
if(not defined $val) {
$self->warn( "P-value not defined. Using expect() instead.");
$val = $self->{'_expect'};
}
return $val if not $fmt or $fmt =~ /^raw/i;
return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
return (split (/eE/, $val)) if $fmt =~ /^parts/i;
return $val; } |
sub hsp
{ my( $self, $option ) = @_;
$option ||= 'best';
if (not ref $self->{'_hsps'}) {
$self->throw("Can't get HSPs: data not collected.");
}
my @hsps = @{$self->{'_hsps'}};
return $hsps[0] if $option =~ /best|first|1/i;
return $hsps[$#hsps] if $option =~ /worst|last/i;
$self->throw("Can't get HSP for: $option\n" .
"Valid arguments: 'best', 'worst'"); } |
sub logical_length
{ my $self = shift;
my $seqType = shift || 'query';
$seqType = 'sbjct' if $seqType eq 'hit';
my $length;
if( $seqType eq 'sbjct' ) {
$length = $self->length;
if($self->algorithm =~ /TBLAST[NX]/ ) {
$length /= 3; }
} else {
$length = $self->query_length();
$self->throw("Must have defined query_len") unless ( $length );
if($self->algorithm =~ /T?BLASTX/ ) {
$length /= 3; }
}
return int($length); } |
sub length_aln
{ my( $self, $seqType, $num ) = @_;
$seqType ||= 'query';
$seqType = 'sbjct' if $seqType eq 'hit';
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
if( defined $num) {
return $self->{'_length_aln_'.$seqType} = $num;
}
my $data = $self->{'_length_aln_'.$seqType};
if(!$data) {
$self->throw("Can't get length aln for sequence type\" $seqType\". " .
"Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
}
return $data; } |
sub gaps
{ my( $self, $seqType, $num ) = @_;
$seqType ||= (wantarray ? 'list' : 'total');
$seqType = 'sbjct' if $seqType eq 'hit';
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
$seqType = lc($seqType);
if( defined $num ) {
$self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
return $self->{'_gaps_'.$seqType} = $num;
}
elsif($seqType =~ /list|array/i) {
return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
}
elsif($seqType eq 'total') {
return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
} else {
return $self->{'_gaps_'.$seqType} || 0;
} } |
sub matches
{ my( $self, $arg1, $arg2) = @_;
my(@data,$data);
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
unless( $arg1 ) {
@data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
return @data if @data;
} else {
if( defined $arg2 ) {
$self->{'_totalIdentical'} = $arg1;
$self->{'_totalConserved'} = $arg2;
return ( $arg1, $arg2 );
}
elsif($arg1 =~ /^id/i) {
$data = $self->{'_totalIdentical'};
} else {
$data = $self->{'_totalConserved'};
}
return $data if $data;
}
$self->throw("Can't get identical or conserved data: no data."); } |
sub start
{ my ($self, $seqType, $num) = @_;
$seqType ||= (wantarray ? 'list' : 'query');
$seqType = 'sbjct' if $seqType eq 'hit';
if( defined $num ) {
$seqType = "_\L$seqType\E";
return $self->{$seqType.'Start'} = $num;
}
if($self->num_hsps == 1) {
return $self->hsp->start($seqType);
} else {
&Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
if($seqType =~ /list|array/i) {
return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
} else {
$seqType = "_\L$seqType\E";
return $self->{$seqType.'Start'};
}
} } |
sub end
{ my ($self, $seqType, $num) = @_;
$seqType ||= (wantarray ? 'list' : 'query');
$seqType = 'sbjct' if $seqType eq 'hit';
if( defined $num ) {
$seqType = "_\L$seqType\E";
return $self->{$seqType.'Stop'} = $num;
}
if($self->num_hsps == 1) {
return $self->hsp->end($seqType);
} else {
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
if($seqType =~ /list|array/i) {
return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
} else {
$seqType = "_\L$seqType\E";
return $self->{$seqType.'Stop'};
}
} } |
sub range
{ my ($self, $seqType) = @_;
$seqType ||= 'query';
$seqType = 'sbjct' if $seqType eq 'hit';
return ($self->start($seqType), $self->end($seqType)); } |
sub frac_identical
{ my ($self, $seqType) = @_;
$seqType ||= 'query';
$seqType = 'sbjct' if $seqType eq 'hit';
$seqType = lc($seqType);
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
my $ident = $self->matches('id');
my $total = $self->length_aln($seqType);
my $ratio = $ident / $total; my $ratio_rounded = sprintf( "%.3f", $ratio);
$ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
return $ratio_rounded; } |
sub frac_conserved
{ my ($self, $seqType) = @_;
$seqType ||= 'query';
$seqType = 'sbjct' if $seqType eq 'hit';
$seqType = lc($seqType);
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
my $consv = $self->matches('cons');
my $total = $self->length_aln($seqType);
my $ratio = $consv / $total; my $ratio_rounded = sprintf( "%.3f", $ratio);
$ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
return $ratio_rounded; } |
sub frac_aligned_query
{ my $self = shift;
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
sprintf( "%.2f", $self->length_aln('query') / $self->logical_length('query'));
} |
sub frac_aligned_hit
{ my $self = shift;
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
sprintf( "%.2f", $self->length_aln('sbjct') / $self->logical_length('sbjct'));
} |
sub num_unaligned_hit
{ my $self = shift;
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
($num < 0 ? 0 : $num ); } |
sub num_unaligned_query
{ my $self = shift;
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
my $num = $self->logical_length('query') - $self->length_aln('query');
($num < 0 ? 0 : $num ); } |
sub seq_inds
{ my ($self, $seqType, $class, $collapse) = @_;
$seqType ||= 'query';
$class ||= 'identical';
$collapse ||= 0;
$seqType = 'sbjct' if $seqType eq 'hit';
my (@inds, $hsp);
foreach $hsp ($self->hsps) {
push @inds, $hsp->seq_inds($seqType, $class);
}
if(@inds) {
my %tmp = map { $_, 1 } @inds;
@inds = sort {$a <=> $b} keys %tmp;
}
$collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds; } |
sub strand
{ my ($self, $seqType, $strnd) = @_;
Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
$seqType ||= (wantarray ? 'list' : 'query');
$seqType = 'sbjct' if $seqType eq 'hit';
$seqType = lc($seqType);
if( defined $strnd ) {
$self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
return $self->{'_strand_'.$seqType} = $strnd;
}
my ($qstr, $hstr);
if($self->num_hsps == 1) {
return
|