sub tile_hsps
{ my $sbjct = shift;
$sbjct->tiled_hsps(1);
if( $sbjct->num_hsps == 0 || $sbjct->n == 0 ) {
_warn_about_no_hsps($sbjct);
return (undef, undef);
} elsif( $sbjct->n == 1 or $sbjct->num_hsps == 1) {
my $hsp = $sbjct->hsp;
$sbjct->length_aln('query', $hsp->length('query'));
$sbjct->length_aln('hit', $hsp->length('sbjct'));
$sbjct->length_aln('total', $hsp->length('total'));
$sbjct->matches( $hsp->matches() );
$sbjct->gaps('query', $hsp->gaps('query'));
$sbjct->gaps('sbjct', $hsp->gaps('sbjct'));
_adjust_length_aln($sbjct);
return (1, 1);
} else {
$sbjct->length_aln('query', 0);
$sbjct->length_aln('sbjct', 0);
$sbjct->length_aln('total', 0);
$sbjct->matches( 0, 0);
$sbjct->gaps('query', 0);
$sbjct->gaps('hit', 0);
}
my($hsp, $qstart, $qstop, $sstart, $sstop);
my($frame, $strand, $qstrand, $sstrand);
my(@qcontigs, @scontigs);
my $qoverlap = 0;
my $soverlap = 0;
my $max_overlap = $sbjct->overlap;
my $hit_qgaps = 0;
my $hit_sgaps = 0;
my $hit_len_aln = 0;
my %start_stop;
my $v = $sbjct->verbose;
foreach $hsp ( $sbjct->hsps() ) {
($qstart, $qstop) = $hsp->range('query');
($sstart, $sstop) = $hsp->range('sbjct');
$frame = $hsp->frame;
$frame = -1 unless defined $frame;
($qstrand, $sstrand) = ($hsp->query->strand,
$hsp->hit->strand);
my ($qgaps, $sgaps) = ($hsp->gaps('query'), $hsp->gaps('hit'));
$hit_qgaps += $qgaps;
$hit_sgaps += $sgaps;
$hit_len_aln += $hsp->length;
$qoverlap = &_adjust_contigs('query', $hsp, $qstart, $qstop,\@
qcontigs, $max_overlap, $frame,
$qstrand);
$soverlap = &_adjust_contigs('sbjct', $hsp, $sstart, $sstop,\@
scontigs, $max_overlap, $frame,
$sstrand);
unless ( defined $start_stop{'qstart'} ) {
$start_stop{'qstart'} = $qstart;
$start_stop{'qstop'} = $qstop;
$start_stop{'sstart'} = $sstart;
$start_stop{'sstop'} = $sstop;
} else {
$start_stop{'qstart'} = ($qstart < $start_stop{'qstart'} ?
$qstart : $start_stop{'qstart'} );
$start_stop{'qstop'} = ($qstop > $start_stop{'qstop'} ?
$qstop : $start_stop{'qstop'} );
$start_stop{'sstart'} = ($sstart < $start_stop{'sstart'} ?
$sstart : $start_stop{'sstart'} );
$start_stop{'sstop'} = ($sstop > $start_stop{'sstop'} ?
$sstop : $start_stop{'sstop'} );
}
}
$sbjct->gaps('query', $hit_qgaps);
$sbjct->gaps('hit', $hit_sgaps);
$sbjct->length_aln('total', $hit_len_aln);
$sbjct->start('query',$start_stop{'qstart'});
$sbjct->end('query', $start_stop{'qstop'});
$sbjct->start('hit', $start_stop{'sstart'});
$sbjct->end('hit', $start_stop{'sstop'});
my (%qctg_dat);
foreach (@qcontigs) {
($frame, $strand) = ($_->{'frame'}, $_->{'strand'});
if( $v > 0 ) {
}
$qctg_dat{ "$frame$strand" }->{'length_aln_query'} += $_->{'stop'} - $_->{'start'} + 1;
$qctg_dat{ "$frame$strand" }->{'totalIdentical'} += $_->{'iden'};
$qctg_dat{ "$frame$strand" }->{'totalConserved'} += $_->{'cons'};
$qctg_dat{ "$frame$strand" }->{'qstrand'} = $strand;
}
my @sortedkeys = sort { $qctg_dat{$b}->{'length_aln_query'}
<=> $qctg_dat{$a}->{'length_aln_query'} }
keys %qctg_dat;
my $longest = $sortedkeys[0];
$sbjct->length_aln('query', $qctg_dat{ $longest }->{'length_aln_query'});
$sbjct->matches($qctg_dat{ $longest }->{'totalIdentical'},
$qctg_dat{ $longest }->{'totalConserved'});
$sbjct->strand('query', $qctg_dat{ $longest }->{'qstrand'});
my (%sctg_dat);
foreach(@scontigs) {
($frame, $strand) = ($_->{'frame'}, $_->{'strand'});
$sctg_dat{ "$frame$strand" }->{'length_aln_sbjct'} += $_->{'stop'} - $_->{'start'} + 1;
$sctg_dat{ "$frame$strand" }->{'frame'} = $frame;
$sctg_dat{ "$frame$strand" }->{'sstrand'} = $strand;
}
@sortedkeys = sort { $sctg_dat{ $b }->{'length_aln_sbjct'}
<=> $sctg_dat{ $a }->{'length_aln_sbjct'}
} keys %sctg_dat;
$longest = $sortedkeys[0];
$sbjct->length_aln('sbjct', $sctg_dat{ $longest }->{'length_aln_sbjct'});
$sbjct->frame( $sctg_dat{ $longest }->{'frame'} );
$sbjct->strand('hit', $sctg_dat{ $longest }->{'sstrand'});
if($qoverlap) {
if($soverlap) { $sbjct->ambiguous_aln('qs');
}
else { $sbjct->ambiguous_aln('q');
}
} elsif($soverlap) {
$sbjct->ambiguous_aln('s');
}
_adjust_length_aln($sbjct);
return ( [@qcontigs], [@scontigs] ); } |
sub _adjust_length_aln
{ my $sbjct = shift;
my $algo = $sbjct->algorithm;
my $hlen = $sbjct->length_aln('sbjct');
my $qlen = $sbjct->length_aln('query');
$sbjct->length_aln('sbjct', logical_length($algo, 'sbjct', $hlen));
$sbjct->length_aln('query', logical_length($algo, 'query', $qlen));} |
sub _adjust_contigs
{ my ($seqType, $hsp, $start, $stop, $contigs_ref,
$max_overlap, $frame, $strand) = @_;
my $overlap = 0;
my ($numID, $numCons);
foreach (@$contigs_ref) {
next unless ($_->{'frame'} == $frame && $_->{'strand'} == $strand);
if ($start >= $_->{'start'} && $stop <= $_->{'stop'}) {
$overlap = 1;
next;
}
if ($start < $_->{'start'} && $stop >= ($_->{'start'} + $max_overlap - 1)) {
eval {
($numID, $numCons) = $hsp->matches(-SEQ =>$seqType,
-START => $start,
-STOP => $_->{'start'} - 1);
};
if($@) { warn "\a\n$@\n"; }
else {
$_->{'start'} = $start; $_->{'iden'} += $numID; $_->{'cons'} += $numCons;
push(@{$_->{hsps}}, $hsp);
$overlap = 1;
}
}
if ($stop > $_->{'stop'} and $start <= ($_->{'stop'} - $max_overlap + 1)) {
eval {
($numID,$numCons) = $hsp->matches(-SEQ =>$seqType,
-START => $_->{'stop'} + 1,
-STOP => $stop);
};
if($@) { warn "\a\n$@\n"; }
else {
$_->{'stop'} = $stop; $_->{'iden'} += $numID; $_->{'cons'} += $numCons;
push(@{$_->{hsps}}, $hsp);
$overlap = 1;
}
}
last if $overlap;
}
if ($overlap && @$contigs_ref > 1) {
my $max = $#{$contigs_ref};
for my $i (0..$max) {
${$contigs_ref}[$i] || next;
my ($i_start, $i_stop) = (${$contigs_ref}[$i]->{start}, ${$contigs_ref}[$i]->{stop});
for my $u ($i+1..$max) {
${$contigs_ref}[$u] || next;
my ($u_start, $u_stop) = (${$contigs_ref}[$u]->{start}, ${$contigs_ref}[$u]->{stop});
if ($u_start < $i_start && $u_stop >= ($i_start + $max_overlap - 1)) {
my ($ids, $cons) = (0, 0);
my $use_start = $i_start;
foreach my $hsp (sort { $b->end <=> $a->end } @{${$contigs_ref}[$u]->{hsps}}) {
my $hsp_start = $hsp->start;
$hsp_start < $use_start || next;
my ($these_ids, $these_cons);
eval {
($these_ids, $these_cons) = $hsp->matches(-SEQ => $seqType, -START => $hsp_start, -STOP => $use_start - 1);
};
if($@) { warn "\a\n$@\n"; }
else {
$ids += $these_ids;
$cons += $these_cons;
}
last if $hsp_start == $u_start;
$use_start = $hsp_start;
}
${$contigs_ref}[$i]->{start} = $u_start;
${$contigs_ref}[$i]->{'iden'} += $ids;
${$contigs_ref}[$i]->{'cons'} += $cons;
push(@{${$contigs_ref}[$i]->{hsps}}, @{${$contigs_ref}[$u]->{hsps}});
${$contigs_ref}[$u] = undef;
}
elsif ($u_stop > $i_stop && $u_start <= ($i_stop - $max_overlap + 1)) {
my ($ids, $cons) = (0, 0);
my $use_stop = $i_stop;
foreach my $hsp (sort { $a->start <=> $b->start } @{${$contigs_ref}[$u]->{hsps}}) {
my $hsp_end = $hsp->end;
$hsp_end > $use_stop || next;
my ($these_ids, $these_cons);
eval {
($these_ids, $these_cons) = $hsp->matches(-SEQ => $seqType, -START => $use_stop + 1, -STOP => $hsp_end);
};
if($@) { warn "\a\n$@\n"; }
else {
$ids += $these_ids;
$cons += $these_cons;
}
last if $hsp_end == $u_stop;
$use_stop = $hsp_end;
}
${$contigs_ref}[$i]->{'stop'} = $u_stop;
${$contigs_ref}[$i]->{'iden'} += $ids;
${$contigs_ref}[$i]->{'cons'} += $cons;
push(@{${$contigs_ref}[$i]->{hsps}}, @{${$contigs_ref}[$u]->{hsps}});
${$contigs_ref}[$u] = undef;
}
}
}
my @merged;
foreach (@$contigs_ref) {
push(@merged, $_ || next);
}
@{$contigs_ref} = @merged;
}
elsif (! $overlap) {
($numID,$numCons) = $hsp->matches(-SEQ=>$seqType);
push @$contigs_ref, {'start' =>$start, 'stop' =>$stop,
'iden' =>$numID, 'cons' =>$numCons,
'strand'=>$strand,'frame'=>$frame,'hsps'=>[$hsp]};
}
return $overlap;} |
sub collapse_nums
{ my @a = @_;
my ($from, $to, $i, @ca, $consec);
$consec = 0;
for($i=0; $i < @a; $i++) {
not $from and do{ $from = $a[$i]; next; };
if($a[$i] == $a[$i-1]+1) {
$to = $a[$i];
$consec++;
} else {
if($consec == 1) { $from .= ",$to"; }
else { $from .= $consec>1 ? "\-$to" : ""; }
push @ca, split(',', $from);
$from = $a[$i];
$consec = 0;
$to = undef;
}
}
if(defined $to) {
if($consec == 1) { $from .= ",$to"; }
else { $from .= $consec>1 ? "\-$to" : ""; }
}
push @ca, split(',', $from) if $from;
@ca; } |
sub _warn_about_no_hsps
{ my $hit = shift;
my $prev_func=(caller(1))[3];
$hit->warn("There is no HSP data for hit '".$hit->name."'.\n".
"You have called a method ($prev_func)\n".
"that requires HSP data and there was no HSP data for this hit,\n".
"most likely because it was absent from the BLAST report.\n".
"Note that by default, BLAST lists alignments for the first 250 hits,\n".
"but it lists descriptions for 500 hits. If this is the case,\n".
"and you care about these hits, you should re-run BLAST using the\n".
"-b option (or equivalent if not using blastall) to increase the number\n".
"of alignments.\n"
);} |