This is the parser for the output of geneid by Enrique Blanco and
Roderic Guigó (IMIM-UPF). See
http://www1.imim.es/software/geneid. It
relies on native geneid output format internally and will work with
geneid versions 1.0 and 1.1. Currently this module supports only the
default mode of operation which is to predict exons and assemble an
optimal gene prediction.
It takes either a file handle or a file name and returns a
Bio::SeqFeature::Gene::GeneStructure object.
sub next_prediction
{
my ($self) = @_;
my ($gene, $transcript, $current_gene_id);
my $transcript_score = 0;
my ($gene_id, $exon_type, $exon_start, $exon_end, $exon_score,
$exon_strand, $start_phase, $end_phase, $start_sig_score,
$end_sig_score, $coding_pot_score, $homol_score);
while (defined($_ = $self->_readline))
{
$self->debug($_) if ($self->verbose > 0);
s/^\s+//;
s/\s+$//;
if (/^>(\S+)\|GeneId/)
{
my $target_id = $1;
$self->_target_id($target_id) unless defined $self->_target_id;
next;
}
next unless (/(Single|First|Internal|Terminal)/);
my @fields = split(/\s+/, $_);
$gene_id = pop @fields;
($exon_type, $exon_start, $exon_end, $exon_score,
$exon_strand, $start_phase, $end_phase, $start_sig_score,
$end_sig_score, $coding_pot_score, $homol_score) = @fields[0..10];
if (! defined $current_gene_id)
{
$current_gene_id = $gene_id;
$transcript_score = $exon_score;
$gene = Bio::SeqFeature::Gene::GeneStructure->new(-source =>
$SOURCE_TAG);
$transcript = Bio::SeqFeature::Gene::Transcript->new(-source =>
$SOURCE_TAG);
$self->_add_exon($gene, $transcript, $exon_type, $exon_start, $exon_end, $exon_score,
$exon_strand, $start_phase, $end_phase, $start_sig_score,
$end_sig_score, $coding_pot_score, $homol_score);
}
elsif ($gene_id eq $current_gene_id)
{
$transcript_score += $exon_score;
$self->_add_exon($gene, $transcript, $exon_type, $exon_start, $exon_end, $exon_score,
$exon_strand, $start_phase, $end_phase, $start_sig_score,
$end_sig_score, $coding_pot_score, $homol_score);
}
else
{
$self->_pushback($_);
last;
}
}
if (defined $gene)
{
$transcript->seq_id($self->_target_id);
$transcript->score($transcript_score);
$gene->add_transcript($transcript);
$gene->seq_id($self->_target_id);
foreach my $exon ($gene->exons)
{
$exon->seq_id($self->_target_id);
}
$self->_set_strand($gene);
}
return $gene;} |
sub _add_exon
{
my ($self, $gene, $transcript, $exon_type, $exon_start, $exon_end,
$exon_score, $exon_strand, $start_phase, $end_phase, $start_sig_score,
$end_sig_score, $coding_pot_score, $homol_score) = @_;
$exon_type =~ s/First/Initial/;
my $strand = $exon_strand eq '+' ? 1 : -1;
my $exon = Bio::SeqFeature::Gene::Exon->new(-source => $SOURCE_TAG,
-start => $exon_start,
-end => $exon_end,
-strand => $strand,
-score => $exon_score);
$exon->is_coding(1);
$exon->add_tag_value("Type", $exon_type);
$exon->add_tag_value('phase', $start_phase);
$exon->add_tag_value('end_phase', $end_phase);
$exon->add_tag_value('start_signal_score', $start_sig_score);
$exon->add_tag_value('end_signal_score', $end_sig_score);
$exon->add_tag_value('coding_potential_score', $coding_pot_score);
$exon->add_tag_value('homology_score', $homol_score);
$transcript->strand($strand) unless $transcript->strand != 0;
$transcript->add_exon($exon, $exon_type);} |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _