This class transforms KEGG gene records into Bio::Seq objects.
This section is supposed to document which sections and properties of
a KEGG databank record end up where in the Bioperl object model. It
is far from complete and presently focuses only on those mappings
which may be non-obvious. $seq in the text refers to the
Bio::Seq::RichSeqI implementing object returned by the parser for each
record.
'ENTRY'
$seq->primary_id
'NAME'
$seq->display_id
'DEFINITION'
$seq->annotation->get_Annotations('description');
'ORTHOLOG'
grep {$_->database eq 'KO'} $seq->annotation->get_Annotations('dblink')
'CLASS'
grep {$_->database eq 'PATH'}
$seq->annotation->get_Annotations('dblink')
'POSITION'
FIXME, NOT IMPLEMENTED
'PATHWAY'
for my $pathway ( $seq->annotation->get_Annotations('pathway') ) {
#
}
'DBLINKS'
$seq->annotation->get_Annotations('dblink')
'CODON_USAGE'
FIXME, NOT IMPLEMENTED
'AASEQ'
$seq->translate->seq
'NTSEQ'
$seq-E<gt>seq
sub _initialize
{ my($self,@args) = @_;
$self->SUPER::_initialize(@args);
$self->{'_func_ftunit_hash'} = {};
if( ! defined $self->sequence_factory ) {
$self->sequence_factory(new Bio::Seq::SeqFactory
(-verbose => $self->verbose(),
-type => 'Bio::Seq::RichSeq'));
} } |
sub next_seq
{ my ($self,@args) = @_;
my $builder = $self->sequence_builder();
my $seq;
my %params;
my $buffer;
my (@acc, @features);
my ($display_id, $annotation);
my $species;
@features = ();
$annotation = undef;
@acc = ();
$species = undef;
%params = (-verbose => $self->verbose); local($/) = "///\n";
$buffer = $self->_readline();
return if( !defined $buffer ); $buffer =~ /^ENTRY/ ||
$self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
my %FIELDS;
my @chunks = split /\n(?=\S)/, $buffer;
foreach my $chunk (@chunks){
my($key) = $chunk =~ /^(\S+)/;
$FIELDS{$key} = $chunk;
}
my(undef,$entry_id,$entry_seqtype,$entry_species) =
split(' ',$FIELDS{ENTRY});
my($name);
if ($FIELDS{NAME}) {
($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
}
my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
$definition =~ s/\s+/ /gs;
}
if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
$aa_seq =~ s/\s+//g;
}
if(( exists $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
$nt_seq =~ s/\s+//g;
}
$annotation = Bio::Annotation::Collection->new();
$annotation->add_Annotation('description',
Bio::Annotation::Comment->new(-text => $definition));
$annotation->add_Annotation('aa_seq',
Bio::Annotation::Comment->new(-text => $aa_seq));
my($ortholog_db,$ortholog_id,$ortholog_desc);
if ($FIELDS{ORTHOLOG}) {
($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
=~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
-database => $ortholog_db,
-primary_id => $ortholog_id,
-comment => $ortholog_desc) );
}
if($FIELDS{MOTIF}){
$FIELDS{MOTIF} =~ s/^MOTIF\s+//;
while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
my $db = $1;
my $ids = $2;
foreach my $id (split(/\s+/, $ids)){
$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
-database =>$db,
-primary_id => $id,
-comment => "") );
}
}
}
if($FIELDS{PATHWAY}) {
$FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
while($FIELDS{PATHWAY} =~ /\s*PATH:\s+(.+)$/mg){
$annotation->add_Annotation('pathway',
Bio::Annotation::Comment->new(-text => "$1"));
}
}
if ($FIELDS{CLASS}) {
$FIELDS{CLASS} =~ s/^CLASS\s+//;
$FIELDS{'CLASS'} =~ s/\n//g;
while($FIELDS{CLASS} =~ /(.*?)\[(\S+):(\S+)\]/g){
my ($pathway,$db,$id) = ($1,$2,$3);
$pathway =~ s/\s+/ /g;
$pathway =~ s/\s$//g;
$pathway =~ s/^\s+//;
$annotation->add_Annotation('pathway',
Bio::Annotation::Comment->new(-text => $pathway));
$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
-database => $db, -primary_id => $id));
}
}
if($FIELDS{DBLINKS}) {
$FIELDS{DBLINKS} =~ s/^DBLINKS/ /;
while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n?/gs){ $annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
-database => $1, -primary_id => $2)) if $1;
}
}
$params{'-alphabet'} = 'dna';
$params{'-seq'} = $nt_seq;
$params{'-display_id'} = $name;
$params{'-accession_number'} = $entry_id;
$params{'-species'} = Bio::Species->new(
-common_name => $entry_species);
$params{'-annotation'} = $annotation;
$builder->add_slot_value(%params);
$seq = $builder->make_object();
return $seq; } |
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists