sub new
{ my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($dir,$nodesfile,$namesfile,$force) = $self->_rearrange([qw
(DIRECTORY NODESFILE NAMESFILE FORCE)], @args);
$self->index_directory($dir || $DEFAULT_INDEX_DIR);
if ( $nodesfile ) {
$self->_build_index($nodesfile,$namesfile,$force);
}
$self->_db_connect;
return $self;} |
sub get_taxon
{ my ($self) = shift;
my ($taxonid, $name);
if (@_ > 1) {
($taxonid, $name) = $self->_rearrange([qw(TAXONID NAME)],@_);
if ($name) {
($taxonid, my @others) = $self->get_taxonids($name);
$self->warn("There were multiple ids ($taxonid @others) matching '$name', using '$taxonid'") if @others > 0;
}
}
else {
$taxonid = shift;
}
$taxonid =~ /^\d+$/ || return;
my $node = $self->{'_nodes'}->[$taxonid] || return;
length($node) || return;
my ($taxid, undef, $rank, $code, $divid, $gen_code, $mito) = split(SEPARATOR,$node);
last unless defined $taxid;
my ($taxon_names) = $self->{'_id2name'}->[$taxid];
my ($sci_name, @common_names) = split(SEPARATOR, $taxon_names);
my $taxon = new Bio::Taxon(
-name => $sci_name,
-common_names => [@common_names],
-ncbi_taxid => $taxid, -rank => $rank,
-division => $DIVISIONS[$divid]->[1],
-genetic_code => $gen_code,
-mito_genetic_code => $mito );
$taxon->{'db_handle'} = $self;
$self->_handle_internal_id($taxon);
return $taxon;} |
sub _build_index
{ my ($self,$nodesfile,$namesfile,$force) = @_;
my ($dir) = ($self->index_directory);
my $nodeindex = "$dir/$DEFAULT_NODE_INDEX";
my $name2idindex = "$dir/$DEFAULT_NAME2ID_INDEX";
my $id2nameindex = "$dir/$DEFAULT_ID2NAME_INDEX";
my $parent2childindex = "$dir/$DEFAULT_PARENT_INDEX";
$self->{'_nodes'} = [];
$self->{'_id2name'} = [];
$self->{'_name2id'} = {};
$self->{'_parent2children'} = {};
if (! -e $nodeindex || $force) {
my (%parent2children,@nodes);
open(NODES,$nodesfile) ||
$self->throw("Cannot open node file '$nodesfile' for reading");
unlink $nodeindex;
unlink $parent2childindex;
my $nh = tie ( @nodes, 'DB_File', $nodeindex, O_RDWR|O_CREAT, 0644, $DB_RECNO) ||
$self->throw("Cannot open file '$nodeindex': $!");
my $btree = tie( %parent2children, 'DB_File', $parent2childindex, O_RDWR|O_CREAT, 0644, $DB_BTREE) ||
$self->throw("Cannot open file '$parent2childindex': $!");
while (<NODES>) {
chomp;
my ($taxid,$parent,$rank,$code,$divid,undef,$gen_code,undef,$mito) = split(/\t\|\t/,$_);
next if $taxid == 1;
if ($parent == 1) {
$parent = $taxid;
}
$nodes[$taxid] = join(SEPARATOR, ($taxid,$parent,$rank,$code,$divid,$gen_code,$mito));
$btree->put($parent,$taxid);
}
close(NODES);
$nh = $btree = undef;
untie @nodes ;
untie %parent2children;
}
if ((! -e $name2idindex || -z $name2idindex) || (! -e $id2nameindex || -z $id2nameindex) || $force) {
open(NAMES,$namesfile) ||
$self->throw("Cannot open names file '$namesfile' for reading");
unlink $name2idindex;
unlink $id2nameindex;
my (@id2name,%name2id);
my $idh = tie (@id2name, 'DB_File', $id2nameindex, O_RDWR|O_CREAT, 0644, $DB_RECNO) ||
$self->throw("Cannot open file '$id2nameindex': $!");
my $nameh = tie ( %name2id, 'DB_File', $name2idindex, O_RDWR|O_CREAT, 0644, $DB_HASH) ||
$self->throw("Cannot open file '$name2idindex': $!");
while (<NAMES>) {
chomp;
my ($taxid, $name, $unique_name, $class) = split(/\t\|\t/,$_);
next if $taxid == 1;
$class =~ s/\s+\|\s*$//;
my $lc_name = lc($name);
my $orig_name = $name;
if ($lc_name =~ /\(class\)$/) { $name2id{$lc_name} = $taxid;
$name =~ s/\s+\(class\)$//;
$lc_name = lc($name);
}
my $taxids = $name2id{$lc_name} || '';
my %taxids = map { $_ => 1 } split(SEPARATOR, $taxids);
unless (exists $taxids{$taxid}) {
$taxids{$taxid} = 1;
$name2id{$lc_name} = join(SEPARATOR, keys %taxids);
}
if ($unique_name) {
$name2id{lc($unique_name)} = $taxid;
}
my $names = $id2name[$taxid] || '';
my @names = split(SEPARATOR, $names);
if ($class && $class eq 'scientific name') {
unshift(@names, $name);
push(@names, $orig_name) if ($orig_name ne $name);
push(@names, $unique_name) if $unique_name;
}
else {
push(@names, $name);
push(@names, $orig_name) if ($orig_name ne $name);
push(@names, $unique_name) if $unique_name;
}
$id2name[$taxid] = join(SEPARATOR, @names);
}
close(NAMES);
$idh = $nameh = undef;
untie( %name2id);
untie( @id2name);
}} |
sub _db_connect
{ my $self = shift;
return if $self->{'_initialized'};
$self->{'_nodes'} = [];
$self->{'_id2name'} = [];
$self->{'_name2id'} = {};
my ($dir) = ($self->index_directory);
my $nodeindex = "$dir/$DEFAULT_NODE_INDEX";
my $name2idindex = "$dir/$DEFAULT_NAME2ID_INDEX";
my $id2nameindex = "$dir/$DEFAULT_ID2NAME_INDEX";
my $parent2childindex = "$dir/$DEFAULT_PARENT_INDEX";
if( ! -e $nodeindex ||
! -e $name2idindex ||
! -e $id2nameindex ) {
$self->warn("Index files have not been created");
return 0;
}
tie ( @{$self->{'_nodes'}}, 'DB_File', $nodeindex, O_RDWR,undef, $DB_RECNO)
|| $self->throw("$! $nodeindex");
tie (@{$self->{'_id2name'}}, 'DB_File', $id2nameindex,O_RDWR, undef,
$DB_RECNO) || $self->throw("$! $id2nameindex");
tie ( %{$self->{'_name2id'}}, 'DB_File', $name2idindex, O_RDWR,undef,
$DB_HASH) || $self->throw("$! $name2idindex");
$self->{'_parentbtree'} = tie( %{$self->{'_parent2children'}},
'DB_File', $parent2childindex,
O_RDWR, 0644, $DB_BTREE);
$self->{'_initialized'} = 1;} |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _