Bio::SeqIO::game
gameWriter
Summary
Bio::SeqIO::game::gameWriter -- a class for writing game-XML
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
# insert sample code here
Description
# Description goes here
Methods
Methods description
Title : new
Usage : my $writer = Bio::SeqIO::game::gameWriter->new($seq);
Function: constructor method for gameWriter
Returns : a game writer object
Args : a Bio::SeqI implementing object |
Title : write_to_game
Usage : $writer->write_to_game
Function: writes the sequence object to game-XML
Returns : xml as a multiline string
Args : none |
Title : _rearrange
Usage : $self->_rearrange($seq)
Function: internal method to rearrange gene containment hierarchies
so that snRNA or transposon features contain their genes
rather than the other way around
Returns : nothing
Args : a Bio::RichSeq object |
Title : _write_feature
Usage : $seld->_write_feature($feat, 1)
Function: internal method for writing generic features as elements
Returns : nothing
Args : a Bio::SeqFeature::Generic object and an optional flag to write a
bare feature set with no annotation wrapper |
Title : _write_gene
Usage : $self->_write_gene($feature)
Function: internal method for rendering gene containment hierarchies into
an nested element
Returns : nothing
Args : a nested Bio::SeqFeature::Generic gene feature
Note : A nested gene hierarchy (gene->mRNA->CDS->exon) is expected. If other gene
subfeatures occur as level one subfeatures (same level as mRNA subfeats)
an attempt will be made to link them to transcripts via the 'standard_name'
qualifier |
Title : _check_cds
Usage : $self->_check_cds($cds, $name)
Function: internal method to check if the CDS associated with an mRNA is
the correct alternative splice variant
Returns : a Bio::SeqFeature::Generic CDS object
Args : the CDS object plus the transcript\'s 'standard_name'
Note : this method only works if alternatively spliced transcripts are bound
together by a 'standard_name' qualifier. If none is present, we will
hope that the exons were derived from a segmented RNA or a CDS with no
associated mRNA feature. Neither of these two cases would be confused
by alternative splice variants. |
Title : _feature_set_tags
Usage : $self->_feature_set_tags($feature)
Function: an internal method to handle tag/value attributes
for a feature set element
Returns : nothing
Args : a Bio::SeqFeatureI-compliant object |
Title : _property
Usage : $self->_property($tag => $value);
Function: an internal method to write property XML elements
Returns : nothing
Args : a tag/value pair |
Title : _tags
Usage : $self->_tags($feat)
Function: an internal method to intercept GO terms and
db_xrefs and handle generic tag/value pairs for a gene
Returns : nothing
Args : a Bio::SeqFeatureI-compliant object |
Title : _unflatten_attribute
Usage : $self->_unflatten_attribute($name, $value)
Function: an internal method to unflatten and write comment or evidence elements
Returns : nothing
Args : a list of strings |
Title : _xref
Usage : $self->_xref($value)
Function: an internal method to write db_xref elements
Returns : nothing
Args : a list of strings |
Title : _feature_span
Usage : $self->_feature_span($name, $type, $loc)
Function: an internal method to write a feature_span element
(the actual feature with coordinates)
Returns : nothing
Args : a feature name and Bio::SeqFeatureI-compliant object |
Title : _seq_relationship
Usage : $self->_seq_relationship($type, $loc)
Function: an internal method to handle feature_span sequence relationships
Returns : nothing
Args : feature type and a Bio::LocationI-compliant object |
Title : _element
Usage : $self->_element($name, $chars, $atts)
Function: an internal method to generate 'generic' XML elements
Example :
my $name = 'foo';
my $content = 'bar';
my $attributes = { baz => 1 };
# print the element
$self->_element($name, $content, $attributes);
Returns : nothing
Args : the element name and content plus a ref to an attribute hash |
Title : _span
Usage : $self->_span($loc)
Function: an internal method to write the 'span' element
Returns : nothing
Args : a Bio::LocationI-compliant object |
Title : _seq
Usage : $self->_seq($seq, $dna)
Function: an internal method to print the 'sequence' element
Returns : nothing
Args : and Bio::SeqI-compliant object and a reference to an attribute hash |
Title : _find_name
Usage : my $name = $self->_find_name($feature)
Function: an internal method to look for a gene name
Returns : a string
Args : a Bio::SeqFeatureI-compliant object |
Methods code
sub new
{ my ($caller, $seq) = @_;
my $class = ref($caller) || $caller;
my $self = bless ( { seq => $seq }, $class );
return $self;} |
sub write_to_game
{ my $self = shift;
my $seq = $self->{seq};
$self->{feats} = [ $seq->remove_SeqFeatures ];
my @gene_containers = ();
for ( @{$self->{feats}} ) {
if ( $_->primary_tag =~ /snRNA|repeat_region|transpos/ &&
$_->has_tag('gene') ) {
my @genes = $_->get_tag_values('gene');
my ($min, $max) = (10000000000000,-10000000000000);
for my $g ( @genes ) {
my $gene;
for my $item ( @{$self->{feats}} ) {
next unless $item->primary_tag eq 'gene';
my ($n) = $item->get_tag_values('gene');
next unless $n eq $g;
$g = $item;
last;
}
$max = $g->end if $g->end > $max;
$min = $g->start if $g->start < $min;
}
push @gene_containers, $_ if $_->length >= ($max - $min);
}
else {
$seq->add_SeqFeature($_);
}
}
my $uf = Bio::SeqFeature::Tools::Unflattener->new;
$uf->unflatten_seq( -seq => $seq, use_magic => 1 );
$self->_rearrange($seq, @gene_containers);
my $atts = {};
my $xml = '';
my $xml_handle = IO::String->new($xml);
my $writer = XML::Writer->new(OUTPUT => $xml_handle,
DATA_MODE => 1,
DATA_INDENT => 2,
NEWLINE => 1
);
$self->{writer} = $writer;
$writer->xmlDecl("ISO-8859-1");
$writer->doctype("game", 'game', "http://www.fruitfly.org/annot/gamexml.dtd.txt");
$writer->comment("GAME-XML generated by Bio::SeqIO::game::gameWriter");
$writer->comment("Created " . localtime);
$writer->comment('Questions: smckay@bcgsc.bc.ca');
$writer->startTag('game', version => 1.2);
my @sources = grep { $_->primary_tag =~ /source|origin|region/i } $seq->get_SeqFeatures;
for my $source ( @sources ) {
next unless $source->length == $seq->length;
for ( qw{ name description db_xref organism md5checksum } ) {
if ( $source->has_tag($_) ) {
$self->{has_organism} = 1 if /organism/;
($atts->{$_}) = $source->get_tag_values($_);
}
}
}
my $seqname = $seq->accession unless $seq->accession eq 'unknown';
$seqname ||= $seq->display_name;
$atts->{name} ||= $seqname;
$seq->display_name;
$self->_seq($seq, $atts);
my $seqtype;
if ( $atts->{mol_type} || $seq->alphabet ) {
$seqtype = $atts->{mol_type} || $seq->alphabet;
}
else {
$seqtype = 'unknown';
}
$writer->startTag(
'map_position',
seq => $atts->{name},
type => $seqtype
);
my ($arm, $start, undef, $end) = $atts->{name} =~ /(\S+):(-?\d+)(\.\.|-)(-?\d+)/;
$self->_element('arm', $arm) if $arm;
$self->_span($start, $end);
$writer->endTag('map_position');
my @feats = $seq->top_SeqFeatures;
my @addback;
for ( @feats ) {
if ( $_->get_SeqFeatures ) {
$self->_write_gene($_);
}
else {
next if $_->primary_tag =~ /CDS|mRNA|exon|UTR/;
$self->_write_feature($_);
}
}
$writer->endTag('game');
$writer->end;
$xml;} |
sub _rearrange
{ my ($self, $seq, @containers) = @_;
my @feats = $seq->remove_SeqFeatures;
my @genes = grep { $_->primary_tag eq 'gene' } @feats;
my @addback = grep { $_->primary_tag ne 'gene' } @feats;
for ( @containers ) {
my @has_genes = $_->get_tag_values('gene');
for my $has_gene ( @has_genes ) {
for my $gene ( @genes ) {
next unless $gene;
my ($gname) = $gene->get_tag_values('gene');
if ( $gname eq $has_gene ) {
$_->add_SeqFeature($gene);
undef $gene;
}
}
}
}
push @addback, (@containers, grep { defined $_ } @genes );
$seq->add_SeqFeature(@addback);} |
sub _write_feature
{ my ($self, $feat, $bare) = @_;
my $writer = $self->{writer};
my $id = $self->_find_name($feat, 'standard_name') || $feat->primary_tag;
unless ( $bare ) {
$writer->startTag('annotation', id => $id);
$self->_element('name', $id);
$self->_element('type', $feat->primary_tag);
$self->_tags($feat);
}
$writer->startTag('feature_set', id => $id);
$self->_element('name', $id);
$self->_element('type', $feat->primary_tag);
$self->_feature_set_tags($feat);
$self->_feature_span($id, $feat);
$writer->endTag('feature_set');
$writer->endTag('annotation') unless $bare;} |
sub _write_gene
{ my ($self, $feat) = @_;
my $writer = $self->{writer};
my $str = $feat->strand;
my $id = $self->_find_name($feat, 'standard_name');
$id ||= $self->_find_name($feat);
my $gid = $self->_find_name($feat, 'gene') || $id;
$writer->startTag('annotation', id => $id);
$self->_element('name', $gid);
$self->_element('type', $feat->primary_tag);
$self->_tags($feat);
my @genes;
if ( $feat->primary_tag eq 'gene' ) {
@genes = ($feat);
}
else {
@genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
}
for my $g ( @genes ) {
my $id ||= $self->_find_name($g, 'standard_name');
my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);
$writer->startTag('gene', association => 'IS');
$self->_element('name', $gid);
$writer->endTag('gene');
my $proteins;
my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
my @variants = ('A' .. 'Z');
for my $mRNA (@mRNAs) {
my ($sn, @units);
if ( $mRNA->primary_tag eq 'transcript') {
my $exon = Bio::SeqFeature::Generic->new ( -primary => 'exon' );
$exon->location($mRNA->location);
$mRNA->add_SeqFeature($exon);
}
unless ( $mRNA->get_SeqFeatures ) {
$self->_write_feature($mRNA, 1); next;
}
my $name = $self->_find_name($mRNA, 'standard_name');
my %attributes;
my ($cds) = grep { $_->primary_tag eq 'CDS' } $mRNA->get_SeqFeatures;
if ( $cds && @mRNAs > 1 && $name ) {
$cds = $self->_check_cds($cds, $name);
}
elsif ( $cds && @mRNAs > 1 ) {
if ( $cds->has_tag('standard_name') ) {
($name) = $cds->get_tag_values('standard_name');
}
}
else {
$name ||= @mRNAs > 1 ? $id . '-R' . (shift @variants) : $id;
}
my $pname;
if ( $cds ) {
if ( $cds->has_tag('standard_name') ) {
($sn) = $cds->get_tag_values('standard_name');
}
if ( $cds->has_tag('protein_id' ) ) {
if ( !$cds->get_tag_values('protein_id') ) {
$cds->remove_tag('protein_id');
if ( $cds->has_tag('product') ) {
$cds->add_tag_value($cds->get_tag_values('product'));
}
}
}
my ($c_start, $c_end);
if ( $cds->has_tag('codon_start') ){
($c_start) = $cds->get_tag_values('codon_start');
$cds->remove_tag('codon_start');
}
else {
$c_start = 1;
}
my $cs = Bio::SeqFeature::Generic->new;
if ( $c_start == 1 ) {
$c_start = $cds->strand > 0 ? $cds->start : $cds->end;
}
if ( $cds->strand < 1 ) {
$c_end = $c_start;
$c_start = $c_start - 2;
}
else {
$c_end = $c_start + 2;
}
$cs->start($c_start);
$cs->end($c_end);
$cs->strand($cds->strand);
$cs->primary_tag('start_codon');
$cs->add_tag_value( 'standard_name' => $name );
push @units, $cs;
if ( $cds->has_tag('problem') ) {
my ($val) = $cds->get_tag_values('problem');
$cds->remove_tag('problem');
$attributes{problem} = $val;
}
my ($aa) = $cds->get_tag_values('translation')
if $cds->has_tag('translation');
if ( $aa ) {
$proteins++;
my $psn = $sn;
$psn =~ s/-R/-P/;
$cds->remove_tag('translation');
my %add_seq = ();
$add_seq{residues} = $aa;
$add_seq{header} = ['seq',
id => $psn,
length => length $aa,
type => 'aa' ];
if ( $cds->has_tag('product_desc') ) {
($add_seq{desc}) = $cds->get_tag_values('product_desc');
$cds->remove_tag('product_desc');
}
unless ( $add_seq{desc} && $add_seq{desc} =~ /cds_boundaries/ ) {
my $start = $cds->start;
my $end = $cds->end;
my $str = $cds->strand;
$str = $str < 0 ? '[-]' : '';
$add_seq{desc} = "translation from_gene[$id] " .
"cds_boundaries:(" . $self->{seq}->display_id .
":$start..$end$str) transcript_info:[$name]";
}
$self->{add_seqs} ||= [];
push @{$self->{add_seqs}},\% add_seq;
}
}
$writer->startTag('feature_set', id => $name);
$self->_element('name', $name);
$self->_element('type', 'transcript');
$self->_feature_set_tags($mRNA);
$self->_feature_set_tags($cds) if $cds;
for my $thing ( @other_stuff ) {
if ( $thing->has_tag('standard_name') ) {
my ($v) = $thing->get_tag_values('standard_name');
if ( $v eq $sn ) {
push @units, $thing;
}
}
}
push @units, grep { $_->primary_tag eq 'exon' } $mRNA->get_SeqFeatures;
@units = sort { $a->start <=> $b->start } @units;
my $count = 0;
if ( $str < 0 ) {
@units = reverse @units;
}
for my $unit ( @units ) {
if ( $unit->primary_tag eq 'exon' ) {
my $ename = $id;
$ename .= ':' . ++$count;
$self->_feature_span($ename, $unit);
}
elsif ( $unit->primary_tag eq 'start_codon' ) {
$self->_feature_span(($sn || $gid), $unit, 1);
}
else {
my $uname = $unit->primary_tag . ":$id";
$self->_feature_span($uname, $unit);
}
}
$writer->endTag('feature_set');
}
$self->{other_stuff} =\@ other_stuff;
}
$writer->endTag('annotation');
for ( @{$self->{add_seqs}} ) {
my %h = %$_;
$writer->startTag(@{$h{header}});
my @desc = split /\s+/, $h{desc};
my $desc = '';
for my $word (@desc) {
my ($lastline) = $desc =~ /.*^(.+)$/sm;
$lastline ||= '';
$desc .= length $lastline < 50 ? " $word " : "\n $word ";
}
$self->_element('description', "\n $desc\n ");
my $aa = $h{residues};
$aa =~ s/\w{60}/$&\n /g;
$aa =~ s/\n\s+$//m;
$aa = "\n " . $aa . "\n ";
$self->_element('residues', $aa);
$writer->endTag('seq');
$self->{add_seqs} = [];
}
for my $thing ( @{$self->{other_stuff}} ) {
next if $thing->has_tag('standard_name');
$self->_write_feature($thing);
}
$self->{other_stuff} = [];} |
sub _check_cds
{ my ($self, $cds, $name) = @_;
if ( $cds->has_tag('standard_name') ) {
my ($cname) = $cds->get_tag_values('standard_name');
if ( $cname eq $name ) {
return $cds;
}
else {
my @CDS = grep { $_->primary_tag eq 'CDS' } @{$self->{feats}};
for ( @CDS ) {
next unless $_->has_tag('standard_name');
my ($sname) = $_->get_tag_values('standard_name');
return $_ if $sname eq $name;
}
return '';
}
}
else {
return $cds;
}} |
sub traverse
{ my $feat = shift;
warn $feat->primary_tag, "\n";
for ($feat->get_SeqFeatures) {
warn "\t", $_->primary_tag, ' ', sname($_), "\n";
for my $s ($_->get_SeqFeatures) {
warn "\t\t", $s->primary_tag, ' ', sname($s), "\n";
for my $ss($s->get_SeqFeatures) {
warn "\t\t\t", $ss->primary_tag, ' ', sname($ss), "\n";
}
}
}} |
sub sname
{ my $f = shift;
return '' unless $f->has_tag('standard_name');
$f->get_tag_values('standard_name');} |
sub _feature_set_tags
{ my ($self, $feat) = @_;
my $writer = $self->{'writer'};
my @tags = $feat->get_all_tags;
for my $tag ( @tags ) {
next if $tag eq 'timestamp';
for my $val ( $feat->get_tag_values($tag) ) {
if ( $tag eq 'date' ) {
my ($date) = $feat->get_tag_values($tag);
my %timestamp;
if ( $feat->has_tag('timestamp') ) {
($timestamp{'timestamp'}) = $feat->get_tag_values('timestamp');
$feat->remove_tag('timestamp');
}
$self->_element('date', $val,\% timestamp);
}
elsif ( $tag eq 'comment' ) {
unless ( $val =~ /=.+?;.+=/ ) {
$writer->startTag('comment');
$self->_element('text', $val);
$writer->endTag('comment');
}
else{
$self->_unflatten_attribute('comment', $val);
}
}
else {
$self->_property($tag, $val);
}
}
}} |
sub _property
{ my ($self, $tag, $val) = @_;
my $writer = $self->{writer};
if ( length $val > 45 ) {
my @val = split /\s+/, $val;
$val = '';
for my $word (@val) {
my ($lastline) = $val =~ /.*^(.+)$/sm;
$lastline ||= '';
$val .= length $lastline < 45 ? " $word " : "\n $word";
}
$val = "\n $val\n ";
$val =~ s/(\S)\s{2}(\S)/$1 $2/g;
}
$writer->startTag('property');
$self->_element('type', $tag);
$self->_element('value', $val);
$writer->endTag('property');} |
sub _tags
{ my ($self, $feat) = @_;
my $writer = $self->{writer};
my @tags = $feat->get_all_tags;
for my $tag ( @tags ) {
for my $val ( $feat->get_tag_values($tag) ) {
if ( $tag =~ /xref/ && $val =~ /GO/ ) {
$writer->startTag('aspect');
$self->_xref($val);
$writer->endTag('aspect');
}
elsif ( $tag eq 'comment' ) {
unless ( $val =~ /=.+?;.+=/ ) {
$writer->startTag('comment');
$self->_element('text', $val);
$writer->endTag('comment');
}
else{
$self->_unflatten_attribute('comment', $val);
}
}
else {
$self->_property($tag, $val);
}
}
}} |
sub _unflatten_attribute
{ my ($self, $name, $val) = @_;
my $writer = $self->{writer};
my %pair;
my @pairs = split ';', $val;
for my $p ( @pairs ) {
my @pair = split '=', $p;
$pair[0] =~ s/^\s+|\s+$//g;
$pair[1] =~ s/^\s+|\s+$//g;
$pair{$pair[0]} = $pair[1];
}
$writer->startTag($name);
for ( keys %pair ) {
$self->_element($_, $pair{$_});
}
$writer->endTag($name);} |
sub _xref
{ my ($self, @xrefs) = @_;
my $writer = $self->{writer};
for my $xref ( @xrefs ) {
my ($db, $acc) = $xref =~ /(\S+):(\S+)/;
$writer->startTag('dbxref');
$self->_element('xref_db', $db);
$acc = $xref if $db eq 'GO';
$self->_element('db_xref_id', $acc);
$writer->endTag('dbxref');
}} |
sub _feature_span
{ my ($self, $name, $feat, $p) = @_;
my $type = $feat->primary_tag;
my $writer = $self->{writer};
my %atts = ( id => $name );
if ( $p ) {
my $pname = $name;
$pname =~ s/-R/-P/;
$atts{produces_seq} = $pname;
}
$writer->startTag('feature_span', %atts );
$self->_element('name', $name);
$self->_element('type', $type);
$self->_seq_relationship('query', $feat);
$writer->endTag('feature_span');} |
sub _seq_relationship
{ my ($self, $type, $loc) = @_;
my $writer = $self->{'writer'};
$writer->startTag(
'seq_relationship',
type => $type,
seq => ($self->{seq}->accession || $self->{seq}->display_id)
);
$self->_span($loc);
$writer->endTag('seq_relationship');} |
sub _element
{ my ($self, $name, $chars, $atts) = @_;
my $writer = $self->{writer};
my %atts = $atts ? %$atts : ();
$writer->startTag($name, %atts);
$writer->characters($chars);
$writer->endTag($name);} |
sub _span
{ my ($self, @loc) = @_;
my ($loc, $start, $end);
if ( @loc == 1 ) {
$loc = $loc[0];
}
elsif ( @loc == 2 ) {
($start, $end) = @loc;
}
if ( $loc ) {
($start, $end) = ($loc->start, $loc->end);
($start, $end) = ($end, $start) if $loc->strand < 0;
}
elsif ( !$start ) {
($start, $end) = (1, $self->{seq}->length);
}
my $writer = $self->{writer};
$writer->startTag('span');
$self->_element('start', $start);
$self->_element('end', $end);
$writer->endTag('span');} |
sub _seq
{ my ($self, $seq, $atts) = @_;
my $writer = $self->{'writer'};
my $alphabet = $seq->alphabet;
$alphabet ||= $seq->mol_type if $seq->can('mol_type');
$alphabet =~ s/protein/aa/;
$alphabet =~ s/rna/cdna/;
my @seq = ( 'seq',
id => $atts->{name},
length => $seq->length,
type => $alphabet,
focus => "true"
);
if ( $atts->{md5checksum} ) {
push @seq, (md5checksum => $atts->{md5checksum});
delete $atts->{md5checksum};
}
$writer->startTag(@seq);
for my $k ( keys %{$atts} ) {
$self->_element($k, $atts->{$k});
}
my $sp = (' ' x 6);
my $dna = $seq->seq;
$dna =~ s/\w{60}/$&\n$sp/g;
$dna = "\n$sp" . $dna . "\n ";
if ( $seq->species && !$self->{has_organism}) {
my $species = $seq->species->binomial;
$self->_element('organism', $species);
}
$self->_element('residues', $dna);
$writer->endTag('seq');} |
sub _find_name
{ my ($self, $feat, $key) = @_;
my $name;
if ( $key && $feat->has_tag($key) ) {
($name) = $feat->get_tag_values($key);
return $name;
}
for ( qw/ gene standard_name locus_tag symbol / ) {
($name) = $feat->get_tag_values($_) if $feat->has_tag($_);
if ( $name ) {
return $name;
}
}
return $feat->display_name || '';} |
General documentation
User feedback is an integral part of the evolution of this
and other Bioperl modules. Send your comments and suggestions preferably
to one of the Bioperl mailing lists.
Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/MailList.shtml - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution.
Bug reports can be submitted via email or the web:
bioperl-bugs@bioperl.org
http://bugzilla.bioperl.org/
| AUTHOR - Sheldon McKay | Top |
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _