Bio::Tools
Fgenesh
Summary
Bio::Tools::Fgenesh - parse results of one Fgenesh run
Package variables
Privates (from "my" definitions)
%ExonTags = ('CDSf' => 'Initial', 'CDSi' => 'Internal', 'CDSl' => 'Terminal', 'CDSo' => 'Singleton')
Included modules
Inherit
Synopsis
use Bio::Tools::Fgenesh;
$fgenesh = Bio::Tools::Fgenesh->new(-file => 'result.fgenesh');
# filehandle:
$fgenesh = Bio::Tools::Fgenesh->new( -fh => \*INPUT );
# parse the results
# note: this class is-a Bio::Tools::AnalysisResult which implements
# Bio::SeqAnalysisParserI, i.e., $fgensh->next_feature() is the same
while($gene = $fgenesh->next_prediction()) {
# $gene is an instance of Bio::Tools::Prediction::Gene, which inherits
# off Bio::SeqFeature::Gene::Transcript.
#
# $gene->exons() returns an array of
# Bio::Tools::Prediction::Exon objects
# all exons:
@exon_arr = $gene->exons();
# initial exons only
@init_exons = $gene->exons('Initial');
# internal exons only
@intrl_exons = $gene->exons('Internal');
# terminal exons only
@term_exons = $gene->exons('Terminal');
# singleton exons:
($single_exon) = $gene->exons();
}
# essential if you gave a filename at initialization (otherwise the file
# will stay open)
$fgenesh->close();
Description
The Fgenesh module provides a parser for Fgenesh (version 2) gene structure
prediction output. It parses one gene prediction into a
Bio::SeqFeature::Gene::Transcript- derived object.
This module also implements the
Bio::SeqAnalysisParserI interface, and thus
can be used wherever such an object fits.
Methods
Methods description
Usage : $genscan->analysis_method(); Purpose : Inherited method. Overridden to ensure that the name matches /genscan/i. Returns : String Argument : n/a |
Title : next_feature Usage : while($gene = $fgenesh->next_feature()) { # do something } Function: Returns the next gene structure prediction of the Fgenesh result file. Call this method repeatedly until FALSE is returned.
The returned object is actually a SeqFeatureI implementing object.
This method is required for classes implementing the
SeqAnalysisParserI interface, and is merely an alias for
next_prediction() at present.
Example :
Returns : A Bio::Tools::Prediction::Gene object.
Args : |
Title : next_prediction Usage : while($gene = $fgenesh->next_prediction()) { ... } Function: Returns the next gene structure prediction of the Genscan result file. Call this method repeatedly until FALSE is returned. Example : Returns : A Bio::Tools::Prediction::Gene object. Args : |
Title : _parse_predictions() Usage : $obj->_parse_predictions() Function: Parses the prediction section. Automatically called by next_prediction() if not yet done. Example : Returns : |
Title : _prediction() Usage : $gene = $obj->_prediction() Function: internal Example : Returns : |
Title : _add_prediction() Usage : $obj->_add_prediction($gene) Function: internal Example : Returns : |
Title : _predictions_parsed Usage : $obj->_predictions_parsed Function: internal Example : Returns : TRUE or FALSE |
Title : _has_cds() Usage : $obj->_has_cds() Function: Whether or not the result contains the predicted CDSs, too. Example : Returns : TRUE or FALSE |
Title : _read_fasta_seq() Usage : ($id,$seqstr) = $obj->_read_fasta_seq(); Function: Simple but specialised FASTA format sequence reader. Uses $self->_readline() to retrieve input, and is able to strip off the traling description lines. Example : Returns : An array of two elements: fasta_id & sequence |
Methods code
| _initialize_state | description | prev | next | Top |
sub _initialize_state
{ my ($self,@args) = @_;
$self->SUPER::_initialize_state(@args);
$self->{'_preds_parsed'} = 0;
$self->{'_has_cds'} = 0;
$self->{'_preds'} = [];
$self->{'_seqstack'} = [];} |
sub analysis_method
{ my ($self, $method) = @_;
if($method && ($method !~ /fgenesh/i)) {
$self->throw("method $method not supported in " . ref($self));
}
return $self->SUPER::analysis_method($method); } |
sub next_feature
{ my ($self,@args) = @_;
return $self->next_prediction(@args);} |
sub next_prediction
{ my ($self) = @_;
my $gene;
$self->_parse_predictions() unless $self->_predictions_parsed();
$gene = $self->_prediction();
if($gene) {
my $seqobj = pop(@{$self->{'_seqstack'}});
my ($id, $seq);
unless ($seqobj) {
($id, $seq) = $self->_read_fasta_seq();
my $alphabet;
if (($id =~ /mrna/) || ($id =~ /cds/)) { $alphabet = 'dna'; }
else { $alphabet = 'protein'; }
$seqobj = Bio::PrimarySeq->new('-seq' => $seq,
'-display_id' => $id,
'-alphabet' => $alphabet);
}
if ($seqobj) {
$gene->primary_tag() =~ /[^0-9]([0-9]+)$/;
my $prednr = $1;
if ($id !~ /_predicted_(\w+)_$prednr/) {
push(@{$self->{'_seqstack'}}, $seqobj);
} else {
if ($1 eq "protein") {
$gene->predicted_protein($seqobj);
} elsif (($1 eq "mrna") || ($1 eq "cds")) {
$self->_has_cds(1);
$gene->predicted_cds($seqobj);
($id, $seq) = $self->_read_fasta_seq();
if ($id =~ /_cds_/) {
($id, $seq) = $self->_read_fasta_seq();
}
$seqobj = Bio::PrimarySeq->new('-seq' => $seq,
'-display_id' => $id,
'-alphabet' => "protein");
$gene->predicted_protein($seqobj);
}
}
}
}
return $gene;} |
sub _parse_predictions
{ my ($self) = @_;
my $gene;
my $seqname;
while(defined($_ = $self->_readline())) {
if(/^\s*(\d+)\s+([+\-])/) {
my $line = $_;
my $prednr = $1;
my $strand = ($2 eq '+') ? 1 : -1;
if(! defined($gene)) {
$gene = Bio::Tools::Prediction::Gene->new(
'-primary' => "GenePrediction$prednr",
'-source' => 'Fgenesh');
}
chomp();
my @flds = split(/\s+/, $line);
my $predobj;
my $is_exon = grep {$line =~ $_} keys(%ExonTags);
my ($start, $end);
if($is_exon) {
$predobj = Bio::Tools::Prediction::Exon->new();
$predobj->score($flds[8]);
$start = $flds[5];
$end = $flds[7];
} else {
$predobj = Bio::SeqFeature::Generic->new();
$predobj->score($flds[5]);
$start = $flds[4];
$end = $flds[4];
}
$predobj->source_tag('Fgenesh');
$predobj->strand($strand);
$predobj->start($start);
$predobj->end($end);
if($is_exon) {
$predobj->primary_tag($ExonTags{$flds[3]} . 'Exon');
$predobj->is_coding(1);
my $cod_offset;
if($predobj->strand() == 1) {
$cod_offset = ($flds[9] - $predobj->start()) % 3;
$cod_offset += 3 if($cod_offset < 1);
} else {
$cod_offset = ($flds[11] - $predobj->end()) % 3;
$cod_offset -= 3 if($cod_offset >= 0);
$cod_offset = -$cod_offset;
}
$predobj->frame(3 - $cod_offset);
$gene->add_exon($predobj, $ExonTags{$flds[1]});
} elsif($flds[3] eq 'PolA') {
$predobj->primary_tag("PolyAsite");
$gene->poly_A_site($predobj);
} elsif($flds[3] eq 'TSS') {
$predobj->primary_tag("Promoter"); $gene->add_promoter($predobj);
}
else {
$self->throw("unrecognized prediction line: " . $line);
}
next;
}
if(/^\s*$/ && defined($gene)) {
$gene->seq_id($seqname);
$self->_add_prediction($gene);
$gene = undef;
next;
}
if(/^(FGENESH)\s+([\d\.]+)/) {
$self->analysis_method($1);
$self->analysis_method_version($2);
if (/\s(\S+)\sgenomic DNA/) {
$self->analysis_subject($1);
}
next;
}
if(/^\s*Seq name:\s+(\S+)/) {
$seqname = $1;
next;
}
/^Predicted protein/ && do {
$self->_pushback($_);
last;
};
}
$self->_predictions_parsed(1);} |
sub _prediction
{ my ($self) = @_;
return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
return shift(@{$self->{'_preds'}});} |
sub _add_prediction
{ my ($self, $gene) = @_;
if(! exists($self->{'_preds'})) {
$self->{'_preds'} = [];
}
push(@{$self->{'_preds'}}, $gene);} |
sub _predictions_parsed
{ my ($self, $val) = @_;
$self->{'_preds_parsed'} = $val if $val;
if(! exists($self->{'_preds_parsed'})) {
$self->{'_preds_parsed'} = 0;
}
return $self->{'_preds_parsed'};} |
sub _has_cds
{ my ($self, $val) = @_;
$self->{'_has_cds'} = $val if $val;
if(! exists($self->{'_has_cds'})) {
$self->{'_has_cds'} = 0;
}
return $self->{'_has_cds'};} |
sub _read_fasta_seq
{ my ($self) = @_;
my ($id, $seq);
my $entry = $self->_readline();
return unless ($entry);
$entry = $self->_readline() if ($entry =~ /^Predicted protein/);
if ($entry =~ /^>FGENESH:/) {
if ($entry =~ /^>FGENESH:\s+(\d+)/) {
$id = "_predicted_protein_" . $1;
} elsif ($entry =~ /^>FGENESH:\[mRNA\]\s+(\d+)/) {
$id = "_predicted_mrna_" . $1;
} elsif ($entry =~ /^>FGENESH:\[exon\]\s+Gene:\s+(\d+)/) {
$id = "_predicted_cds_" . $1;
}
$seq = "";
$entry = $self->_readline();
}
my $done = 0;
while (!$done) {
if (($entry =~ /^>FGENESH:\[exon\]/) && ($id =~ /^_predicted_cds_/)) {
$entry = $self->_readline();
} else {
$seq .= $entry;
}
last unless $entry = $self->_readline();
if (($entry =~ /^>/) &&
(!(($entry =~ /^>FGENESH:\[exon\]/) && ($id =~ /^_predicted_cds_/)))) {
$self->_pushback($entry); last;
}
}
$seq =~ s/\s//g; return ($id, $seq);} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via the
web:
http://bugzilla.open-bio.org/
Email chris-at-dwan.org
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _