Bio::Search::Tiling MapTileUtils
Other packages in the module: Bio::Search::Tiling::MapTileUtils Bio::Search::HSP::HSPI
SummaryPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvs
Summary
Bio::Search::Tiling::MapTileUtils - utilities for manipulating closed intervals for an HSP tiling algorithm
Package variables
No package variables defined.
Included modules
Exporter
Synopsis
Not used directly.
Description
Not used directly.
Methods
BEGIN Code
interval_tilingDescriptionCode
decompose_intervalDescriptionCode
are_disjointDescriptionCode
min_covering_intervalDescriptionCode
get_intervals_from_hspsDescriptionCode
_allowable_filtersDescriptionCode
_set_attributesDescriptionCode
_mapping_coeff
No description
Code
_ints_as_text
No description
Code
containing_hspsDescriptionCode
covering_groupsDescriptionCode
Methods description
interval_tiling code    nextTop
 Title   : interval_tiling()
Usage : @tiling = interval_tiling( \@array_of_intervals )
Function: Find minimal set of intervals covering the input set
Returns : array of arrayrefs of the form
( [$interval => [ @indices_of_collapsed_input_intervals ]], ...)
Args : arrayref of intervals
decompose_intervalcodeprevnextTop
 Title   : decompose_interval
Usage : @decomposition = decompose_interval( \@overlappers )
Function: Calculate the disjoint decomposition of a set of
overlapping intervals, each annotated with a list of
covering intervals
Returns : array of arrayrefs of the form
( [[@interval] => [@indices_of_coverers]], ... )
Args : arrayref of intervals (arrayrefs like [$a0, $a1], with
Note : Each returned interval is associated with a list of indices of the
original intervals that cover that decomposition component
(scalar size of this list could be called the 'coverage coefficient')
Note : Coverage: each component of the decomp is completely contained
in the input intervals that overlap it, by construction.
Caveat : This routine expects the members of @overlappers to overlap,
but doesn't check this.
are_disjointcodeprevnextTop
 Title   : are_disjoint
Usage : are_disjoint( [$a0, $a1], [$b0, $b1] )
Function: Determine if two intervals are disjoint
Returns : True if the intervals are disjoint, false if they overlap
Args : array of two intervals
min_covering_intervalcodeprevnextTop
 Title   : min_covering_interval 
Usage : $interval = min_covering_interval( [$a0,$a1],[$b0,$b1] )
Function: Determine the minimal covering interval for two intervals
Returns : an interval
Args : two intervals
get_intervals_from_hspscodeprevnextTop
 Title   : get_intervals_from_hsps
Usage : @intervals = get_intervals_from_hsps($type, @hsp_objects)
Function: Return array of intervals of the form [ $start, $end ],
from an array of hsp objects
Returns : an array of intervals
Args : scalar $type, array of HSPI objects; where $type is one of 'hit',
'subject', 'query'
_allowable_filterscodeprevnextTop
    
Title : _allowable_filters
Usage : _allowable_filters($Bio_Search_Hit_HitI, $type)
Function: Return the HSP filters (strand, frame) allowed,
based on the reported algorithm
Returns : String encoding allowable filters:
s = strand, f = frame
Empty string if no filters allowed
undef if algorithm unrecognized
Args : A Bio::Search::Hit::HitI object,
scalar $type, one of 'hit', 'subject', 'query';
default is 'query'
_set_attributescodeprevnextTop
 Title   : _set_attributes
Usage : $tiling->_set_attributes()
Function: Sets attributes for invocant
that depend on algorithm name
Returns : True on success
Args : none
Note : setting based on the configuration table
%alg_lookup
containing_hsps()codeprevnextTop
 Title   : containing_hsps
Usage : @hsps = containing_hsps($interval, @hsps_to_search)
Function: Return a list of hsps whose coordinates completely contain the
given $interval
Returns : Array of HSP objects
Args : $interval : [$int1, $int2],
array of HSP objects
covering_groups()codeprevnextTop
 Title   : covering_groups
Usage :
Function: divide a list of **ordered,disjoint** intervals (as from a
coverage map) into a set of disjoint covering groups
Returns : array of arrayrefs, each arrayref a covering set of
intervals
Args : array of intervals
Methods code
BEGINTop
BEGIN {
    our @ISA = qw( Exporter );
    our @EXPORT = qw( get_intervals_from_hsps 
                      interval_tiling 
                      decompose_interval
                      containing_hsps
                      covering_groups
                      _allowable_filters 
                      _set_attributes
                      _mapping_coeff);
}
interval_tilingdescriptionprevnextTop
sub interval_tiling {
    return unless $_[0]; # no input
my $n = scalar @{$_[0]}; my %input; @input{(0..$n-1)} = @{$_[0]}; my @active = (0..$n-1); my @hold; my @tiled_ints; my @ret; while (@active) { my $tgt = $input{my $tgt_i = shift @active}; push @tiled_ints, $tgt_i; my $tgt_not_disjoint = 1; while ($tgt_not_disjoint) { $tgt_not_disjoint = 0; while (my $try_i = shift @active) { my $try = $input{$try_i}; if ( !are_disjoint($tgt, $try) ) { $tgt = min_covering_interval($tgt,$try); push @tiled_ints, $try_i; $tgt_not_disjoint = 1; } else { push @hold, $try_i; } } if (!$tgt_not_disjoint) { push @ret, [ $tgt => [@tiled_ints] ]; @tiled_ints = (); } @active = @hold; @hold = (); } } return @ret;
}
decompose_intervaldescriptionprevnextTop
sub decompose_interval {
    return unless $_[0]; # no input
my @ints = @{$_[0]}; my (%flat,@flat); ### this is ok, but need to handle the case where a lh and rh endpoint
### coincide...
# decomposition --
# flatten:
# every lh endpoint generates (lh-1, lh)
# every rh endpoint generates (rh, rh+)
foreach (@ints) { $flat{$$_[0]-1}++; $flat{$$_[0]}++; $flat{$$_[1]}++; $flat{$$_[1]+1}++; } # sort, create singletons if nec.
my @a; @a = sort {$a<=>$b} keys %flat; # throw out first and last (meeting a boundary condition)
shift @a; pop @a; # look for singletons
@flat = (shift @a, shift @a); if ( $flat[1]-$flat[0] == 1 ) { @flat = ($flat[0],$flat[0], $flat[1]); } while (my $a = shift @a) { if ($a-$flat[-2]==2) { push @flat, $flat[-1]; # create singleton interval
} push @flat, $a; } if ($flat[-1]-$flat[-2]==1 and @flat % 2) { push @flat, $flat[-1]; } # component intervals are consecutive pairs
my @decomp; while (my $a = shift @flat) { push @decomp, [$a, shift @flat]; } # for each component, return a list of the indices of the input intervals
# that cover the component.
my @coverage; foreach my $i (0..$#decomp) { foreach my $j (0..$#ints) { unless (are_disjoint($decomp[$i], $ints[$j])) { if (defined $coverage[$i]) { push @{$coverage[$i]}, $j; } else { $coverage[$i] = [$j]; } } } } return map { [$decomp[$_] => $coverage[$_]] } (0..$#decomp);
}
are_disjointdescriptionprevnextTop
sub are_disjoint {
    my ($int1, $int2) = @_;
    return 1 if ( $$int1[1] < $$int2[0] ) || ( $$int2[1] < $$int1[0]);
    return 0;
}
min_covering_intervaldescriptionprevnextTop
sub min_covering_interval {
    my ($int1, $int2) = @_;
    my @a = sort {$a <=> $b} (@$int1, @$int2);
    return [$a[0],$a[-1]];
}
get_intervals_from_hspsdescriptionprevnextTop
sub get_intervals_from_hsps {
    my $type = shift;
    my @hsps;
    if (ref($type) =~ /HSP/) {
	push @hsps, $type;
	$type = 'query';
    }
    elsif ( !grep /^$type$/,qw( hit subject query ) ) {
	die "Unknown HSP type '$type'";
    }
    $type = 'hit' if $type eq 'subject';
    push @hsps, @_;
    my @ret;
    foreach (@hsps) {
#	push @ret, [ $_->start($type), $_->end($type)];
push @ret, [ $_->range($type) ]; } return @ret; } # fast, clear, nasty, brutish and short.
# for _allowable_filters(), _set_mapping()
# covers BLAST, FAST families
# FASTA is ambiguous (nt or aa) based on alg name only
my $alg_lookup = { 'N' => { 'mapping' => [1,1], 'def_context' => ['p_','p_'], 'has_strand' => [1, 1], 'has_frame' => [0, 0]}, 'P' => { 'mapping' => [1,1], 'def_context' => ['all','all'], 'has_strand' => [0, 0], 'has_frame' => [0, 0]}, 'X' => { 'mapping' => [3, 1], 'def_context' => [undef,'all'], 'has_strand' => [1, 0], 'has_frame' => [1, 0]}, 'Y' => { 'mapping' => [3, 1], 'def_context' => [undef,'all'], 'has_strand' => [1, 0], 'has_frame' => [1, 0]}, 'TA' => { 'mapping' => [1, 3], 'def_context' => ['all',undef], 'has_strand' => [0, 1], 'has_frame' => [0, 1]}, 'TN' => { 'mapping' => [1, 3], 'def_context' => ['p_',undef], 'has_strand' => [1,1], 'has_frame' => [0, 1]}, 'TX' => { 'mapping' => [3, 3], 'def_context' => [undef,undef], 'has_strand' => [1, 1], 'has_frame' => [1, 1]}, 'TY' => { 'mapping' => [3, 3], 'def_context' => [undef,undef], 'has_strand' => [1, 1], 'has_frame' => [1, 1]} };
}
_allowable_filtersdescriptionprevnextTop
sub _allowable_filters {
    my $hit = shift;
    my $type = shift;
    $type ||= 'q';
    unless (grep /^$type$/, qw( h q  s ) ) {
	warn("Unknown type $type'; returning ''");
	return '';
    }
    $type = 'h' if $type eq 's';
    my $alg = $hit->algorithm;

    # pretreat (i.e., kludge it)
$alg =~ /^RPS/ and ($alg) = ($alg =~ /\(([^)]+)\)/); for ($hit->algorithm) { /MEGABLAST/i && do { return qr/[s]/;
}; /(.?)BLAST(.?)/i && do { return $$alg_lookup{$1.$2}{$type}; }; /(.?)FAST(.?)/ && do { return $$alg_lookup{$1.$2}{$type}; }; do { # unrecognized
last; }; } return;
}
_set_attributesdescriptionprevnextTop
sub _set_attributes {
    my $self = shift;
    my $alg = $self->hit->algorithm;

    # pretreat (i.e., kludge it)
$alg =~ /^RPS/ and ($alg) = ($alg =~ /\(([^)]+)\)/); for ($alg) { /MEGABLAST/i && do { ($self->{_mapping_query},$self->{_mapping_hit}) = (1,1); ($self->{_def_context_query},$self->{_def_context_hit}) = ('p_','p_'); ($self->{_has_frame_query},$self->{_has_frame_hit}) = (0, 0); ($self->{_has_strand_query},$self->{_has_strand_hit}) = (1, 1); last; }; /(.?)BLAST(.?)/i && do { ($self->{_mapping_query},$self->{_mapping_hit}) = @{$$alg_lookup{$1.$2}{mapping}}; ($self->{_def_context_query},$self->{_def_context_hit}) = @{$$alg_lookup{$1.$2}{def_context}}; ($self->{_has_frame_query},$self->{_has_frame_hit}) = @{$$alg_lookup{$1.$2}{has_frame}}; ($self->{_has_strand_query},$self->{_has_strand_hit}) = @{$$alg_lookup{$1.$2}{has_strand}}; last; }; /(.?)FAST(.?)/ && do { ($self->{_mapping_query},$self->{_mapping_hit}) = @{$$alg_lookup{$1.$2}{mapping}}; ($self->{_def_context_query},$self->{_def_context_hit}) = @{$$alg_lookup{$1.$2}{def_context}}; ($self->{_has_frame_query},$self->{_has_frame_hit}) = @{$$alg_lookup{$1.$2}{has_frame}}; ($self->{_has_strand_query},$self->{_has_strand_hit}) = @{$$alg_lookup{$1.$2}{has_strand}}; last; }; do { # unrecognized
$self->warn("Unrecognized algorithm '$alg'; defaults may not work"); ($self->{_mapping_query},$self->{_mapping_hit}) = (1,1); ($self->{_def_context_query},$self->{_def_context_hit}) = ('all','all'); ($self->{_has_frame_query},$self->{_has_frame_hit}) = (0,0); ($self->{_has_strand_query},$self->{_has_strand_hit}) = (0,0); return 0; last; }; } return 1;
}
_mapping_coeffdescriptionprevnextTop
sub _mapping_coeff {
    my $obj = shift;
    my $type = shift;
    my %type_i = ( 'query' => 0, 'hit' => 1 );
    unless ( ref($obj) && $obj->can('algorithm') ) {
	$obj->warn("Object type unrecognized");
	return undef;
    }
    $type ||= 'query';
    unless ( grep(/^$type$/, qw( query hit subject ) ) ) {
	$obj->warn("Sequence type unrecognized");
	return undef;
    }
    $type = 'hit' if $type eq 'subject';
    my $alg = $obj->algorithm;

    # pretreat (i.e., kludge it)
$alg =~ /^RPS/ and ($alg) = ($alg =~ /\(([^)]+)\)/); for ($alg) { /MEGABLAST/i && do { return 1; }; /(.?)BLAST(.?)/i && do { return $$alg_lookup{$1.$2}{'mapping'}[$type_i{$type}]; }; /(.?)FAST(.?)/ && do { return $$alg_lookup{$1.$2}{'mapping'}[$type_i{$type}]; }; do { # unrecognized
last; }; } return; } # a graphical depiction of a set of intervals
}
_ints_as_textdescriptionprevnextTop
sub _ints_as_text {
    my $ints = shift;
    my @ints = @$ints;
    my %pos;
    for (@ints) {
	$pos{$$_[0]}++;
	$pos{$$_[1]}++;
    }
    
    my @pos = sort {$a<=>$b} keys %pos;
    @pos = map {sprintf("%03d",$_)} @pos;
#header
my $max=0; $max = (length > $max) ? length : $max for (@pos); for my $j (0..$max-1) { my $i = $max-1-$j; my @line = map { substr($_, $j, 1) || '0' } @pos; print join('', @line), "\n"; } print '-' x @pos, "\n"; undef %pos; @pos{map {sprintf("%d",$_)} @pos} = (0..@pos); foreach (@ints) { print ' ' x $pos{$$_[0]}, '[', ' ' x ($pos{$$_[1]}-$pos{$$_[0]}-1), ']', ' ' x (@pos-$pos{$$_[1]}), "\n"; }
}
containing_hspsdescriptionprevnextTop
sub containing_hsps {
    my $intvl = shift;
    my @hsps = @_;
    my @ret;
    my ($beg, $end) = @$intvl;
    foreach my $hsp (@hsps) {
	my ($start, $stop) = ($hsp->start, $hsp->end);
	push @ret, $hsp if ( $start <= $beg and $end <= $stop );
    }
    return @ret;
}
covering_groupsdescriptionprevnextTop
sub covering_groups {
    my @intervals = @_;
    return unless @intervals;
    my (@groups, $grp);
    push @{$groups[0]}, shift @intervals;
    $grp = $groups[0];
    for (my $intvl = shift @intervals; @intervals; $intvl = shift @intervals) {
	if ( $intvl->[0] - $grp->[-1][1] == 1 ) { # intervals are direcly adjacent
push @$grp, $intvl; } else { $grp = [$intvl]; push @groups, $grp; } } return @groups; } 1; # need our own subsequencer for hsps.
package Bio::Search::HSP::HSPI; use strict; use warnings;
}
General documentation
NOTETop
An "interval" in this module is defined as an arrayref [$a0, $a1], where
$a0, $a1 are scalar numbers satisfying $a0 E<lt>= $a1.
AUTHORTop
Mark A. Jensen - maj -at- fortinbras -dot- us
APPENDIXTop