This writer produces Gbrowse flavour GFF from a Search::Result object.
sub new
{ my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
($self->{'_evalue'},
$self->{'_cigar'},
$self->{'_prefix'},
$self->{'signif'} ) = $self->_rearrange([qw(E_VALUE OUTPUT_CIGAR PREFIX
OUTPUT_SIGNIF)], @args);
$self->{'_evalue'} && warn( "Use of the -e_value argument is deprecated.\nIn future, use\$ writer->filter(\"type\",\& code) instead.\n\tparsing will proceed correctly with this e_value\n");
$self->{Gbrowse_HSPID} = 0;
$self->{Gbrowse_HITID} = 0;
$self->{'_prefix'} ||= $Defaults{'Prefix'};
return $self;} |
sub to_string
{ my ($self, $result, @args) = @_;
my ($format, $reference,
$match_tag,$hsp_tag,
$prefix) = $self->_rearrange([qw
(VERSION
REFERENCE
MATCH_TAG HSP_TAG
PREFIX)], @args);
$self->warn($reference) if $reference;
$reference ||='hit'; $match_tag ||= $Defaults{'MatchTag'}; $hsp_tag ||= $Defaults{'HSPTag'}; $prefix ||= $self->{'_prefix'};
$self->throw("$reference must be one of 'query', or 'hit'\n") unless $reference;
$format ||='3';
my $gffio = Bio::Tools::GFF->new(-gff_version => $format);
return "" unless defined $gffio;
my ($GFF,$cigar,$score);
my ($resultfilter,$hitfilter,$hspfilter) = (
$self->filter('RESULT'),
$self->filter('HIT'),
$self->filter('HSP'));
$result->can('rewind') && $result->rewind(); next if (defined $resultfilter && ! (&{$resultfilter}($result)) );
while( my $hit = $result->next_hit ) {
if (defined $self->{_evalue}){
next unless ($hit->significance < $self->{_evalue});
}
next if( defined $hitfilter && ! &{$hitfilter}($hit) );
my $refseq = $reference eq 'hit' ? $hit->name : $result->query_name;
my $seqname = $reference eq 'hit' ? $result->query_name : $hit->name; if ($self->{_signif}) {
$score = $hit->significance;
} else {
$score = $hit->raw_score;
}
$self->throw("No reference sequence name found in hit; required for GFF (this may not be your fault if your report type does not include reference sequence names)\n") unless $refseq;
my $source = $hit->algorithm;
$self->throw("No algorithm name found in hit; required for GFF (this may not be your fault if your report type does not include algorithm names)\n") unless $refseq;
$self->throw("This module only works on BLASTN reports at this time. Sorry.\n") unless $source eq "BLASTN";
my @plus_hsps;
my @minus_hsps;
my ($qpmin, $qpmax, $qmmin, $qmmax, $spmin, $spmax, $smmin, $smmax); while( my $hsp = $hit->next_hsp ) {
if ( defined $self->{_evalue} ) {
next unless ($hsp->significance < $self->{_evalue});
}
next if( defined $hspfilter && ! &{$hspfilter}($hsp) ); if ($hsp->hit->strand >= 0 ){
push @plus_hsps, $hsp;
if (defined $qpmin){ $qpmin = $hsp->query->start if $hsp->query->start < $qpmin;
$qpmax = $hsp->query->end if $hsp->query->end > $qpmax;
$spmin = $hsp->hit->start if $hsp->hit->start < $spmin;
$spmax = $hsp->hit->end if $hsp->hit->end > $spmax;
} else {
$qpmin = $hsp->query->start;
$qpmax = $hsp->query->end;
$spmin = $hsp->hit->start;
$spmax = $hsp->hit->end;
}
}
if ($hsp->hit->strand < 0 ){
push @minus_hsps, $hsp;
if (defined $qmmin){ $qmmin = $hsp->query->start if $hsp->query->start < $qmmin;
$qmmax = $hsp->query->end if $hsp->query->end > $qmmax;
$smmin = $hsp->hit->start if $hsp->hit->start < $smmin;
$smmax = $hsp->hit->end if $hsp->hit->end > $smmax;
} else {
$qmmin = $hsp->query->start;
$qmmax = $hsp->query->end;
$smmin = $hsp->hit->start;
$smmax = $hsp->hit->end;
}
}
}
next unless (scalar(@plus_hsps) + scalar(@minus_hsps)); my $ID = $self->_incrementHIT();
if (scalar(@plus_hsps)){
my %tags = ( 'ID' => "match_sequence$ID");
if ($format==2.5) {
$tags{'Target'} = "$prefix:$seqname";
$tags{'tstart'} = $qmmin;
$tags{'tend'} = $qmmax;
} else {
$tags{'Target'} = "$prefix:$seqname $qpmin $qpmax";
}
if ( $self->{'_cigar'} ) {
$tags{'Gap'} = $cigar;
}
my $feat = Bio::SeqFeature::Generic->new(
-seq_id => $refseq,
-source_tag => $source,
-primary_tag => $match_tag,
-start => $spmin,
-end => $spmax,
-score => $score,
-strand => '+',
-frame => '.',
-tag =>\% tags
);
my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
$GFF .= $feat->gff_string($formatter)."\n";
}
if (scalar(@minus_hsps)){
my %tags = ( 'ID' => "match_sequence$ID");
if ($format==2.5) {
$tags{'Target'} = "$prefix:$seqname";
$tags{'tstart'} = $qpmax;
$tags{'tend'} = $qpmin;
}
else {
$tags{'Target'} = "$prefix:$seqname $qpmax $qpmin";
}
my $feat = Bio::SeqFeature::Generic->new(
-seq_id => $refseq,
-source_tag => $source,
-primary_tag => $match_tag,
-start => $smmin,
-end => $smmax,
-score => $score,
-strand => '-',
-frame => '.',
-tag =>\% tags
);
my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
$GFF .= $feat->gff_string($formatter)."\n";
}
foreach my $hsp (@plus_hsps){
my $hspID = $self->_incrementHSP();
my $qstart = $hsp->query->start;
my $qend = $hsp->query->end;
my $sstart = $hsp->hit->start;
my $send = $hsp->hit->end;
my $score = $hsp->score;
my %tags = ( 'ID' => "match_hsp$hspID",
'Parent' => "match_sequence$ID" );
if ($format==2.5) {
$tags{'Target'} = "$prefix:$seqname";
$tags{'tstart'} = $qstart;
$tags{'tend'} = $qend;
}
else {
$tags{'Target'} = "$prefix:$seqname $qstart $qend";
}
if ( $self->{'_cigar'} ) {
$tags{'Gap'} = $hsp->cigar_string;
}
my $feat = Bio::SeqFeature::Generic->new(
-seq_id => $refseq,
-source_tag => $source,
-primary_tag => $hsp_tag,
-start => $sstart,
-end => $send,
-score => $score,
-strand => '+',
-frame => '.',
-tag =>\% tags
);
my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
$GFF .= $feat->gff_string($formatter)."\n";
}
foreach my $hsp (@minus_hsps) {
my $hspID = $self->_incrementHSP();
my $qstart = $hsp->query->start;
my $qend = $hsp->query->end;
my $sstart = $hsp->hit->start;
my $send = $hsp->hit->end;
my $score = $hsp->score;
my %tags = ( 'ID' => "match_hsp$hspID",
'Parent' => "match_sequence$ID" );
if ($format==2.5) {
$tags{'Target'} = "$prefix:$seqname";
$tags{'tstart'} = $qend;
$tags{'tend'} = $qstart;
}
else {
$tags{'Target'} = "$prefix:$seqname $qend $qstart";
}
if ( $self->{'_cigar'} ) {
$tags{'Gap'} = $hsp->cigar_string;
}
my $feat = Bio::SeqFeature::Generic->new(
-seq_id => $refseq,
-source_tag => $source,
-primary_tag => $hsp_tag,
-start => $sstart,
-end => $send,
-score => $score,
-strand => '-',
-frame => '.',
-tag =>\% tags
);
my $formatter = Bio::Tools::GFF->new(-gff_version => $format);
$GFF .= $feat->gff_string($formatter) ."\n";
}
}
return $GFF;} |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _