Bio::Graph::IO psi_xml
SummaryPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
Bio::Graph::IO::psi_xml - access and manipulate PSI XML graphs
Package variables
Globals (from "use vars" definitions)
$g
$fac
$c
%species
Included modules
Bio::Annotation::Collection
Bio::Annotation::DBLink
Bio::Graph::Edge
Bio::Graph::ProteinGraph
Bio::Seq::SeqFactory
Bio::Species
XML::Twig
base qw ( Bio::Graph::IO )
Synopsis
Do not use this module directly, use Bio::Graph::IO, for example:
  my $graph_io = Bio::Graph::IO->new(-format => 'psi_xml',
-file => 'data.xml');
Description
PSI XML is a format to describe protein-protein interactions and
interaction networks. The following databases support PSI XML:
    BIND
http://www.bind.ca
    DIP
http://dip.doe-mbi.ucla.edu/
    HPRD
http://www.hprd.org
    IntAct
http://www.ebi.ac.uk/intact
    MINT
http://cbm.bio.uniroma2.it/mint/
Notes on PSI XML from various databases can be found in the Bioperl Wiki
at http://bioperl.org/wiki/Module:Bio::Graph::IO::psi_xml
Documentation for PSI XML can be found at http://psidev.sourceforge.net.
Methods
BEGIN Code
new
No description
Code
_initialize
No description
Code
next_networkDescriptionCode
_proteinInteractorDescriptionCode
_addEdge
No description
Code
Methods description
next_networkcode    nextTop
 name       : next_network
purpose : to construct a protein interaction graph from xml data
usage : my $gr = $io->next_network();
arguments : void
returns : A Bio::Graph::ProteinGraph object
_proteinInteractorcodeprevnextTop
 name      : _proteinInteractor
purpose : parses protein information into Bio::Seq::RichSeq objects
returns : void
usage : internally called by next_network(),
arguments : none.
Methods code
BEGINTop
BEGIN {
	$fac  = Bio::Seq::SeqFactory->new(-type => 'Bio::Seq::RichSeq');
}
newdescriptionprevnextTop
sub new {
	my ($class,@args) = @_;
	my $self = bless {}, $class;
	$self->_initialize(@args);
	return $self;
}
_initializedescriptionprevnextTop
sub _initialize {
  my($self,@args) = @_;
  return unless $self->SUPER::_initialize_io(@args);
}
next_networkdescriptionprevnextTop
sub next_network {
 my $self = shift;
 $g = Bio::Graph::ProteinGraph->new(); ## bugfix, now is reset each time
my $t = XML::Twig->new ( TwigHandlers => { proteinInteractor =>\& _proteinInteractor, interaction =>\& _addEdge }); $t->parsefile($self->file); return $g;
}
_proteinInteractordescriptionprevnextTop
sub _proteinInteractor {
	my ($twig, $pi) = @_;

	my ($acc, $sp, $desc, $taxid,  $prim_id);

	my $org =  $pi->first_child('organism');
	$taxid  = $org->att('ncbiTaxId');

	## just make new species object if doesn't already exist ##
if (!exists($species{$taxid})) { my $common = $org->first_child('names')->first_child('shortLabel')->text; my $full = $org->first_child('names')->first_child('fullName')->text; my ($gen, $sp) = $full =~ /(\S+)\s+(.+)/; my $sp_obj = Bio::Species->new(-ncbi_taxid => $taxid, -classification => [$sp, $gen], -common_name => $common ); $sp_obj->name('scientific', $full); $species{$taxid} = $sp_obj; print "species parse error $@" if $@; } ## next extract sequence id info ##
my @ids = $pi->first_child('xref')->children(); my %ids = map {$_->att('db'), $_->att('id')} @ids; $ids{'psixml'} = $pi->att('id'); $prim_id = defined ($ids{'GI'}) ? $ids{'GI'} : ''; $acc = $ids{'RefSeq'} || $ids{'SWP'} || $ids{'Swiss-Prot'} || # db name from HPRD
$ids{'Ref-Seq'} || # db name from HPRD
$ids{'GI'} || $ids{'PIR'} || $ids{'intact'} || # db name from IntAct
$ids{'psi-mi'}; # db name from IntAct
## get description line - certain files, like PSI XML from HPRD, have
## "shortLabel" but no "fullName"
eval { $desc = $pi->first_child('names')->first_child('fullName')->text; }; if ($@) { warn("No fullName, use shortLabel for description instead"); $desc = $pi->first_child('names')->first_child('shortLabel')->text; } # use ids that aren't accession_no or primary_tag to build
# dbxref Annotations
my $ac = Bio::Annotation::Collection->new(); for my $db (keys %ids) { next if $ids{$db} eq $acc; next if $ids{$db} eq $prim_id; my $an = Bio::Annotation::DBLink->new( -database => $db, -primary_id => $ids{$db}, ); $ac->add_Annotation('dblink',$an); } ## now we can make sequence object ##
my $node = $fac->create( -accession_number => $acc, -desc => $desc, -display_id => $acc, -primary_id => $prim_id, -species => $species{$taxid}, -annotation => $ac); ## now fill hash with keys = ids and vals = node refs to have lookup
## hash for nodes by any id.
$g->{'_id_map'}{$ids{'psixml'}} = $node; if (defined($node->primary_id)) { $g->{'_id_map'}{$node->primary_id} = $node; } if (defined($node->accession_number)) { $g->{'_id_map'}{$node->accession_number} = $node; } ## cycle thru annotations
$ac = $node->annotation(); for my $an ($ac->get_Annotations('dblink')) { $g->{'_id_map'}{$an->primary_id} = $node; } $twig->purge();
}
_addEdgedescriptionprevnextTop
sub _addEdge {
	my ($twig, $i) = @_;
	my @ints = $i->first_child('participantList')->children;
	my @node = map {$_->first_child('proteinInteractorRef')->att('ref')} @ints;
	my $edge_id = $i->first_child('xref')->first_child('primaryRef')->att('id');
	$g->add_edge(Bio::Graph::Edge->new(
					-nodes =>[($g->{'_id_map'}{$node[0]}, 
                               $g->{'_id_map'}{$node[1]})],
					-id    => $edge_id));
	$twig->purge();
}
General documentation
add_edgeTop
 name     : _addEdge
purpose : adds a new edge to a graph
usage : do not call, called by next_network
returns : void