Bio::SeqIO
gbxml
Toolbar
Summary
Bio::SeqIO::gbxml - GenBank sequence input/output stream using SAX
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
It is probably best not to use this object directly, but rather go
through the SeqIO handler system. To read a GenBank XML file:
$stream = Bio::SeqIO->new( -file => $filename, -format => 'gbxml');
while ( my $bioSeqObj = $stream->next_seq() ) {
# do something with $bioSeqObj
}
To write a Seq object to the current file handle in GenBank XML format:
$stream->write_seq( -seq => $seqObj);
If instead you would like a XML::DOM object containing the GBXML, use:
my $newXmlObject = $stream->to_bsml( -seq => $seqObj);
Description
This object can transform Bio::Seq objects to and from GenBank XML
flatfiles.
Methods
| _initialize | No description | Code |
| next_seq | Description | Code |
| start_document | No description | Code |
| end_document | No description | Code |
| start_element | No description | Code |
| end_element | No description | Code |
| characters | No description | Code |
Methods description
Title : next_seq Usage : my $bioSeqObj = $stream->next_seq Function: Retrieves the next sequence from a SeqIO::gbxml stream. Returns : A reference to a Bio::Seq::RichSeq object Args : |
Methods code
sub _initialize
{ my ($self) = shift;
$self->SUPER::_initialize(@_);
$self->{'_parser'} = XML::SAX::ParserFactory->parser('Handler' => $self);
if( ! defined $self->sequence_factory ) {
$self->sequence_factory(Bio::Seq::SeqFactory->new
(-verbose => $self->verbose(),
-type => 'Bio::Seq::RichSeq'));
}
return;} |
sub next_seq
{ my $self = shift;
if( @{$self->{'_seendata'}->{'_seqs'} || []} || eof($self->_fh)) {
return shift @{$self->{'_seendata'}->{'_seqs'}};
}
$self->{'_parser'}->parse_file($self->_fh);
return shift @{$self->{'_seendata'}->{'_seqs'}};
}
} |
sub start_document
{ my ($self,$doc) = @_;
$self->{'_seendata'} = {'_seqs' => []
};
$self->SUPER::start_document($doc);} |
sub end_document
{ my ($self,$doc) = @_;
$self->SUPER::end_document($doc); } |
sub start_element
{ my ($self,$ele) = @_;
my $name = uc($ele->{'LocalName'});
if( $name eq 'GBSET' ) {
} elsif( $name eq 'GBSEQ' ) {
push @{$self->{'_seendata'}->{'_seqs'}},
$self->sequence_factory->create();
} elsif( $name eq 'GBFEATURE' ) {
my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
my $fthelper = new Bio::SeqIO::FTHelper();
$fthelper->verbose($self->verbose());
push @{$self->{'_seendata'}->{'_feats'}}, $fthelper;
}
$self->{'_characters'} = '';
push @{$self->{'_state'}}, $name;
$self->SUPER::start_element($ele);} |
sub end_element
{ my ($self,$ele) = @_;
pop @{$self->{'_state'}};
my $name = uc $ele->{'LocalName'};
my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
if ($name eq 'GBSEQ_LOCUS') {
$curseq->display_id($self->{'_characters'});
} elsif ($name eq 'GBSEQ_LENGTH' ) {
$curseq->length($self->{'_characters'});
} elsif ($name eq 'GBSEQ_MOLTYPE' ) {
if ($self->{'_characters'} =~ /mRNA|dna/) {
$curseq->alphabet('dna');
} else {
$curseq->alphabet('protein');
}
$curseq->molecule($self->{'_characters'});
} elsif ($name eq 'GBSEQ_TOPOLOGY' ) {
$curseq->is_circular(($self->{'_characters'} =~ /^linear$/i) ? 0 : 1);
} elsif ($name eq 'GBSEQ_DIVISION' ) {
$curseq->division($self->{'_characters'});
} elsif ($name =~ m/GBSEQ_UPDATE-DATE|GBSEQ_CREATE-DATE/ ) { my $date = $self->{'_characters'}; if($date =~ s/\s*((\d{1,2})-(\w{3})-(\d{2,4})).*/$1/) {
if( length($date) < 11 ) { my ($d,$m,$y) = ($2,$3,$4);
$d = "0$d" if( length($d) == 1 );
if( length($y) == 2 ) {
$y = ($y > 60) ? "19$y" : "20$y";
$self->warn("Date was malformed, guessing the century for $date to be $y\n");
}
$date = [join('-',$d,$m,$y)];
}
$curseq->add_date($date);
}
} elsif ($name eq 'GBSEQ_DEFINITION' ) {
$curseq->description($self->{'_characters'});
} elsif ($name eq 'GBSEQ_PRIMARY-ACCESSION' ) {
$curseq->accession_number($self->{'_characters'});
} elsif ($name eq 'GBSEQ_ACCESSION-VERSION' ) {
$self->{'_characters'} =~ m/^\w+\.(\d+)/; if ($1) {
$curseq->version($1);
$curseq->seq_version($1);
}
} elsif ($name eq 'GBSEQID' ) {
if ($self->{'_characters'} =~ m/gi\|(\d+)/) { $curseq->primary_id($1); }
} elsif ($name eq 'GBSEQ_SOURCE') {
$self->{'_taxa'}->{'_common'} = $self->{'_characters'};
} elsif ($name eq 'GBSEQ_ORGANISM' ) {
my @organell_names = ("chloroplast", "mitochondr");
my @spflds = split(' ', $self->{'_characters'});
$_ = $self->{'_characters'};
if (grep { $_ =~ /^$spflds[0]/i; } @organell_names) {
$self->{'_taxa'}->{'_organelle'} = shift(@spflds);
}
$self->{'_taxa'}->{'_genus'} = shift(@spflds);
$self->{'_taxa'}->{'_species'} = shift(@spflds) if (@spflds);
$self->{'_taxa'}->{'_sub_species'} = shift(@spflds) if (@spflds);
$self->{'_taxa'}->{'_ns_name'} = $self->{'_characters'};
} elsif ($name eq 'GBSEQ_TAXONOMY' ) {
$_ = $self->{'_characters'};
my @class;
push (@class, map { s/^\s+//; s/\s+$//; $_; } split /[;\.]+/, $_);
next unless $self->{'_taxa'}->{'_genus'} and $self->{'_taxa'}->{'_genus'} !~ /^(unknown|None)$/oi;
if ($class[0] eq 'Viruses') {
push( @class, $self->{'_taxa'}->{'_ns_name'} );
}
elsif ($class[$#class] eq $self->{'_taxa'}->{'_genus'}) {
push( @class, $self->{'_taxa'}->{'_species'} );
} else {
push( @class, $self->{'_taxa'}->{'_genus'}, $self->{'_taxa'}->{'_species'} );
}
@class = reverse @class;
my $make = Bio::Species->new();
$make->classification(\@ class, "FORCE");
$make->common_name($self->{'_taxa'}->{'_common'}) if $self->{'_taxa'}->{'_common'};
unless ($class[-1] eq 'Viruses') {
$make->sub_species( $self->{'_taxa'}->{'_sub_species'} ) if $self->{'_taxa'}->{'_sub_species'};
}
$make->organelle( $self->{'_taxa'}->{'_organelle'} ) if $self->{'_taxa'}->{'_organelle'};
$curseq->species($make);
delete $self->{'_taxa'};
} elsif( $name eq 'GBSEQ_COMMENT' ) {
$curseq->annotation->add_Annotation('comment', Bio::Annotation::Comment->new(-text => $self->{'_characters'} )) if ($self->{'_characters'});
} elsif ($name eq 'GBFEATURE_KEY' ) {
$curfeat->key($self->{'_characters'});
} elsif ($name eq 'GBFEATURE_LOCATION' ) {
$curfeat->loc($self->{'_characters'});
} elsif ($name eq 'GBQUALIFIER_NAME' ) {
$self->{'_feature'}->{"_qualifer_name"} = $self->{'_characters'};
} elsif ($name eq 'GBQUALIFIER_VALUE' ) {
my $qualifier = $self->{'_feature'}->{"_qualifer_name"};
delete $self->{'_feature'}->{"_qualifer_name"};
$curfeat->field->{$qualifier} ||= [];
push(@{$curfeat->field->{$qualifier}}, $self->{'_characters'});
} elsif ($name eq 'GBSEQ_SEQUENCE' ) {
$curseq->seq($self->{'_characters'});
} elsif( $name eq 'GBFEATURE' ) {
shift @{$self->{'_seendata'}->{'_feats'}};
if (!defined($curfeat)) {
$self->warn("Unexpected error in feature table for ".$curseq->display_id." Skipping feature, attempting to recover");
} else {
my $feat = $curfeat->_generic_seqfeature($self->location_factory(), $curseq->display_id);
if ($curseq->species && ($feat->primary_tag eq 'source') &&
$feat->has_tag('db_xref') && (! $curseq->species->ncbi_taxid())) {
foreach my $tagval ($feat->get_tag_values('db_xref')) {
if (index($tagval,"taxon:") == 0) {
$curseq->species->ncbi_taxid(substr($tagval,6));
}
}
}
$curseq->add_SeqFeature($feat);
}
}
$self->SUPER::end_element($ele);
}
} |
sub characters
{ my ($self,$data) = @_;
if( ! @{$self->{'_state'}} ) {
$self->warn("Calling characters with no previous start_element call. Ignoring data");
} else {
$self->{'_characters'} .= $data->{'Data'};
}
$self->SUPER::characters($data);
}
1;} |
General documentation
In addition to parts of the Bio:: hierarchy, this module uses:
XML::SAX
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via the
web:
https://redmine.open-bio.org/projects/bioperl/
Email golharam-at-umdnj-dot-edu