Proof of principle. Not for production use.
This adaptor is a proof-of-principle. It is used to fetch BioFetch
sequences into a Bio::DB::GFF database (currently uses a hard-coded
EMBL database) as needed. This allows the Generic Genome Browser to
be used as a Genbank/EMBL browser.
sub new
{ my $class = shift;
my $self = $class->SUPER::new(@_);
my ($preferred,$proxy,$source) = rearrange(['PREFERRED_TAGS','PROXY','SOURCE'],@_);
$self->_preferred_tags($preferred ? $preferred :\% preferred_tags);
$self->_source($source || 'EMBL');
if ($proxy) {
my @args = ref($proxy) ? @$proxy : eval $proxy;
$self->{_proxy} =\@ args if @args;
}
$self; } |
sub _load_embl
{ my $self = shift;
my $acc = shift;
my $seq = shift;
my $refclass = $self->refclass;
my $locus = $seq->id;
my $source = $self->_source;
$self->setup_load();
my @aliases;
foreach ($seq->accession,$seq->get_secondary_accessions) {
next if lc($_) eq lc($acc);
push @aliases,[Alias => $_];
}
$self->load_gff_line(
{
ref => $acc,
class => $refclass,
source => $source,
method => 'region',
start => 1,
stop => $seq->length,
score => undef,
strand => '.',
phase => '.',
gclass => $self->refclass,
gname => $acc,
tstart => undef,
tstop => undef,
attributes => [[Note => $seq->desc],@aliases],
}
);
my ($transcript_version,$mRNA_version) = (0,0);
for my $feat ($seq->all_SeqFeatures) {
my $attributes = $self->get_attributes($feat);
my $name = $self->guess_name($attributes);
my $location = $feat->location;
my @segments = map {[$_->start,$_->end,$_->seq_id]}
$location->can('sub_Location') ? $location->sub_Location : $location;
my $type= $feat->primary_tag;
next if (lc($type) eq 'contig');
if (lc($type) eq 'variation' and $feat->length == 1) {
$type = 'SNP';
} elsif (lc($type) eq 'variation' ) {
$type = 'chromosome_variation';
}
if ($type eq 'source') {
$type = 'region';
}
if ($type =~ /misc.*RNA/i) {
$type = 'RNA';
}
if ($type eq 'misc_feature' and $name->[1] =~ /similar/i) {
$type = 'computed_feature_by_similarity';
} elsif ($type eq 'misc_feature') {
warn "skipping a misc_feature\n";
next;
}
my $parttype = $feat->primary_tag eq 'mRNA' ? 'exon' : $feat->primary_tag;
if ($type eq 'gene') {
$transcript_version = 0;
$mRNA_version = 0;
} elsif ($type eq 'mRNA') {
$name->[1] = sprintf("%s.t%02d",$name->[1],++$transcript_version);
} elsif ($type eq 'CDS') {
$name->[0] = 'mRNA';
$name->[1] = sprintf("%s.t%02d",$name->[1],$transcript_version);
}
my $strand = $feat->strand;
my $str = defined $strand ?
($strand > 0 ? '+' : '-')
: '.';
$self->load_gff_line( {
ref => $acc,
class => $refclass,
source => $source,
method => $type,
start => $location->start,
stop => $location->end,
score => $feat->score || undef,
strand => $str,
phase => $feat->frame || '.',
gclass => $name->[0],
gname => $name->[1],
tstart => undef,
tstop => undef,
attributes => $attributes,
}
) if ($type &&
($type ne 'CDS'||($type eq 'CDS'&&@segments==1) ) );
@$attributes = ();
next if @segments == 1;
for my $segment (@segments) {
my $strand = $feat->strand;
my $str = defined $strand ?
($strand > 0 ? '+' : '-')
: '.';
$self->load_gff_line( {
ref => $segment->[2] eq $locus ? $acc : $segment->[2],
class => $refclass,
source => $source,
method => $parttype,
start => $segment->[0],
stop => $segment->[1],
score => $feat->score || undef,
strand => $str,
phase => $feat->frame || '.',
gclass => $name->[0],
gname => $name->[1],
tstart => undef,
tstop => undef,
attributes => $attributes,
}
);
}
}
$self->finish_load();
$self->load_sequence_string($acc,$seq->seq);
1; } |
Lincoln Stein <lstein@cshl.org>.
Copyright 2002 Cold Spring Harbor Laboratory.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.