Bio::OntologyIO simpleGOparser
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
simpleGOparser - a simple GO parser returning a SimpleGOEngine
Package variables
No package variables defined.
Included modules
Bio::Ontology::GOterm
Bio::Ontology::SimpleGOEngine
Bio::Root::IO
Bio::Root::Root
Inherit
Bio::Root::Root
Synopsis
  use Bio::Ontology::simpleGOparser;

  my $parser = Bio::Ontology::simpleGOparser->new
	( -go_defs_file_name    => "/home/czmasek/GO/GO.defs",
	  -components_file_name => "/home/czmasek/GO/component.ontology",
	  -functions_file_name  => "/home/czmasek/GO/function.ontology",
	  -processes_file_name  => "/home/czmasek/GO/process.ontology" );

  my $engine = $parser->parse();

  my $IS_A    = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
  my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
Description
Needs Graph.pm from CPAN.
Methods
newDescriptionCode
initDescriptionCode
parseDescriptionCode
go_defs_file_nameDescriptionCode
components_file_nameDescriptionCode
functions_file_nameDescriptionCode
processes_file_nameDescriptionCode
_add_term
No description
Code
_part_of_relationship
No description
Code
_is_a_relationship
No description
Code
_add_relationship
No description
Code
_has_term
No description
Code
_parse_relationships_file
No description
Code
_get_first_goid
No description
Code
_get_name
No description
Code
_get_synonyms
No description
Code
_get_db_cross_refs
No description
Code
_get_secondary_goids
No description
Code
_get_isa_goids
No description
Code
_get_partof_goids
No description
Code
_count_spaces
No description
Code
_next_term
No description
Code
_go_engine
No description
Code
_create_GOentry
No description
Code
_not_first_record
No description
Code
_done
No description
Code
_go_defs_file
No description
Code
_term
No description
Code
Methods description
newcode    nextTop
 Title   : new
 Usage   : $parser = Bio::OntologyIO::simpleGOparser->new( -go_defs_file_name    => "/path/to/GO.defs",
                                                         -components_file_name => "/path/to/component.ontology"
                                                         -functions_file_name  => "/path/to/function.ontology"
                                                         -processes_file_name  => "/path/to/process.ontology" );                      
 Function: Creates a new simpleGOparser.
 Returns : A new simpleGOparser object.
 Args    : -go_defs_file_name    => the GO defs-file name
           -components_file_name => the component.ontology-file name
           -functions_file_name  => the function.ontology-file name
           -processes_file_name  => the process.ontology-file name
initcodeprevnextTop
 Title   : init()
 Usage   : $parser->init();   
 Function: Initializes this object.
 Returns : 
 Args    :
parsecodeprevnextTop
 Title   : parse()
 Usage   : $parser->parse();   
 Function: Parses the files set wirh "new" or with methods
           go_defs_file_name, components_file_name, functions_file_name,
           processes_file_name.
 Returns : [Bio::Ontology::SimpleGOEngine]
 Args    :
go_defs_file_namecodeprevnextTop
 Title   : go_defs_file_name
 Usage   : $parser->go_defs_file_name( "GO.defs" );
 Function: Set/get for the GO defs-file_name.
 Returns : The GO defs-file_name [string].
 Args    : The GO defs-file_name [string] (optional).
components_file_namecodeprevnextTop
 Title   : components_file_name
 Usage   : $parser-> components_file_name( "function.ontology" );
 Function: Set/get for the function ontology file name.
 Returns : The function ontology file name [string].
 Args    : The function ontology file name [string] (optional).
functions_file_namecodeprevnextTop
 Title   : functions_file_name
 Usage   : $parser->functions_file_name( "function.ontology" );
 Function: Set/get for functions file name.
 Returns : The functions file name [string].
 Args    : The functions file name [string] (optional).
processes_file_namecodeprevnextTop
 Title   : processes_file_name
 Usage   : $parser->processes_file_name( "GO.defs" );
 Function: Set/get for the processes file name.
 Returns : The processes file name [string].
 Args    : The processes file name [string] (optional).
Methods code
newdescriptionprevnextTop
sub new {
    my( $class, @args ) = @_;
    
    my $self = $class->SUPER::new( @args );

    my ( $go_defs_file_name,
         $components,
         $functions,
         $processes, ) 
    = $self->_rearrange( [ qw( GO_DEFS_FILE_NAME
                               COMPONENTS_FILE_NAME
                               FUNCTIONS_FILE_NAME
                               PROCESSES_FILE_NAME ) ], @args );

    $self->init(); 
    
    $go_defs_file_name && $self->go_defs_file_name( $go_defs_file_name );
    $components        && $self->components_file_name( $components );
    $functions         && $self->functions_file_name( $functions );
    $processes         && $self->processes_file_name( $processes );
    
    
                         
    return $self;
} # new
}
initdescriptionprevnextTop
sub init {
    my ( $self ) = @_;
    
    $self->{ "_go_defs_file_name" }    = undef;
    $self->{ "_components_file_name" } = undef;
    $self->{ "_functions_file_name" }  = undef;
    $self->{ "_processes_file_name" }  = undef;
    $self->_done( FALSE );
    $self->_not_first_record( FALSE );
    $self->_term( "" );
    
    $self->_go_engine( Bio::Ontology::SimpleGOEngine->new() );

} # init
}
parsedescriptionprevnextTop
sub parse {
    my ( $self ) = @_;
    my $x = 0;
    
    while( my $goterm = $self->_next_term() ) {
        $self->_add_term( $goterm );
    }

    if ( $self->components_file_name() ) {
        $self->_parse_relationships_file( $self->components_file_name(), "components ontology" );
    }
    if ( $self->functions_file_name() ) {
        $self->_parse_relationships_file( $self->functions_file_name(), "functions ontology" );
    }
    if ( $self->processes_file_name() ) {
        $self->_parse_relationships_file( $self->processes_file_name(), "processes ontology" );
    }
    
    return $self->_go_engine();
    
} # parse
}
go_defs_file_namedescriptionprevnextTop
sub go_defs_file_name {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        $self->{ "_go_defs_file_name" } = $value;
        if ( $value =~ /\W/ ) {
            $self->_go_defs_file( new Bio::Root::IO->new( -file => $value ) );
        } 
    }
    
    return $self->{ "_go_defs_file_name" };
} # go_defs_file_name
}
components_file_namedescriptionprevnextTop
sub components_file_name {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        $self->{ "_components_file_name" } = $value;
    }
    
    return $self->{ "_components_file_name" };
} # components_file_name
}
functions_file_namedescriptionprevnextTop
sub functions_file_name {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        $self->{ "_functions_file_name" } = $value;
    }
    
    return $self->{ "_functions_file_name" };
} # functions_file_name
}
processes_file_namedescriptionprevnextTop
sub processes_file_name {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        $self->{ "_processes_file_name" } = $value;
    }
    
    return $self->{ "_processes_file_name" };
} # processes_file_name
}
_add_termdescriptionprevnextTop
sub _add_term {
    my ( $self, $term ) = @_;

    $self->_go_engine()->add_term( $term );


} # _add_term 
}
_part_of_relationshipdescriptionprevnextTop
sub _part_of_relationship {
    my ( $self, $term ) = @_;

    return $self->_go_engine()->part_of_relationship();


} # _part_of_relationship 
}
_is_a_relationshipdescriptionprevnextTop
sub _is_a_relationship {
    my ( $self, $term ) = @_;

    return $self->_go_engine()->is_a_relationship();


} # _is_a_relationship 
}
_add_relationshipdescriptionprevnextTop
sub _add_relationship {
    my ( $self, $parent, $child, $type ) = @_;

   
    $self->_go_engine()->add_relationship( $parent, $child, $type );


} # _add_term 
}
_has_termdescriptionprevnextTop
sub _has_term {
    my ( $self, $term ) = @_;

    
    return $self->_go_engine()->has_term( $term );


} # _add_term
}
_parse_relationships_filedescriptionprevnextTop
sub _parse_relationships_file {
    my ( $self, $file_name, $category ) = @_;
    
    my $file = new Bio::Root::IO->new( -file => $file_name );
    my $cat = Bio::Ontology::Term->new( -name => $category );
    
    my @stack       = ();
    my $prev_spaces = -1;
    my $prev_term   = "";
    
    
    while( my $line = $file->_readline() ) {
        
        if ( $line =~ /^!/ ) {
            next;
        }
        
        my $current_term   = $self->_get_first_goid( $line );
        my @isa_parents    = $self->_get_isa_goids( $line );
        my @partof_parents = $self->_get_partof_goids( $line );
        my @syns           = $self->_get_synonyms( $line );
        my @sec_go_ids     = $self->_get_secondary_goids( $line );
        my @cross_refs     = $self->_get_db_cross_refs( $line );
        
        
        if ( ! $self->_has_term( $current_term ) ) {
            my $goterm = $self->_create_GOentry( $self->_get_name( $line, $current_term ), $current_term );
            $self->_add_term( $goterm );
        }
        
        my $current_term_object = $self->_go_engine()->get_term( $current_term );
        
        $current_term_object->add_dblinks( @cross_refs );
        $current_term_object->add_secondary_GO_ids( @sec_go_ids );
        $current_term_object->add_synonyms( @syns );
        unless ( $line =~ /^\$/ ) {
            $current_term_object->category( $cat );
        }
        foreach my $parent ( @isa_parents ) {
            if ( ! $self->_has_term( $parent ) ) {
                my $goterm = $self->_create_GOentry( $self->_get_name( $line, $parent ), $parent );
                $self->_add_term( $goterm );
            }
            
            $self->_add_relationship( $parent,
                                      $current_term,
                                      $self->_is_a_relationship() );
             
        }
        foreach my $parent ( @partof_parents ) {
            if ( ! $self->_has_term( $parent ) ) {
                my $goterm = $self->_create_GOentry( $self->_get_name( $line, $parent ), $parent );
                $self->_add_term( $goterm );
            }
           
            $self->_add_relationship( $parent,
                                      $current_term,
                                      $self->_part_of_relationship() );
        }
        
        my $current_spaces = $self->_count_spaces( $line );
        
        if ( $current_spaces != $prev_spaces  ) {
       
            if ( $current_spaces == $prev_spaces + 1 ) {
                push( @stack, $prev_term ); 
            }
            elsif ( $current_spaces < $prev_spaces ) {
                my $n = $prev_spaces -  $current_spaces;
                for ( my $i = 0; $i < $n; ++$i ) {
                    pop( @stack );
                }
            }
            else {
                die( "format error" );
            } 
        }
        
        my $parent = $stack[ @stack - 1 ];
        
        
        if ( $line =~ /^\$/ ) {
        }
        elsif ( $line =~ /^\s*</ ) {
            $self->_add_relationship( $parent,
                                      $current_term,
                                      $self->_part_of_relationship() );
        }
        elsif ( $line =~ /^\s*%/ ) {
            $self->_add_relationship( $parent,
                                      $current_term,
                                      $self->_is_a_relationship() );
        }
        else {
            die( "format error" );
        }
        
        
        $prev_spaces = $current_spaces;
        
        $prev_term = $current_term;
        
    } 
} # _parse_relationships_file
}
_get_first_goiddescriptionprevnextTop
sub _get_first_goid {
    my ( $self, $line ) = @_;
    
    if ( $line =~ /;\s*(GO:\d{7})/ ) {
        return $1;
    }
    else {
        die( "format error" );
    }
    
} # _get_first_goid
}
_get_namedescriptionprevnextTop
sub _get_name {
    my ( $self, $line, $goid ) = @_;
    
    if ( $line =~ /([^;^<^%^,]+);\s*$goid/ ) {
        my $name = $1;
        $name =~ s/\s+$//;
        $name =~ s/^\s+//;
        return $name;
    }
    else {
        return undef;
    }
} # _get_name   
}
_get_synonymsdescriptionprevnextTop
sub _get_synonyms {
    my ( $self, $line ) = @_;
   
    my @synonyms = ();
   
    while ( $line =~ /synonym\s*:\s*([^;^<^%]+)/g ) {
        my $syn = $1;
        $syn =~ s/\s+$//;
        $syn =~ s/^\s+//;
        push( @synonyms, $syn );
    }
    return @synonyms;
    
} # _get_synonyms
}
_get_db_cross_refsdescriptionprevnextTop
sub _get_db_cross_refs {
    my ( $self, $line ) = @_;
   
    my @refs = ();
   
    while ( $line =~ /;([^;^<^%^:]+:[^;^<^%^:]+)/g ) {
        my $ref = $1;
        if ( $ref =~ /synonym/ || $ref =~ /GO:\d{7}/ ) {
            next;
        }
        $ref =~ s/\s+$//;
        $ref =~ s/^\s+//;
        push( @refs, $ref );
    }
    return @refs;
}
_get_secondary_goidsdescriptionprevnextTop
sub _get_secondary_goids {
    my ( $self, $line ) = @_;
    my @secs = ();
   
    while ( $line =~ /,\s*(GO:\d{7})/g ) {
        my $sec = $1;
        push( @secs, $sec );
    }
    return @secs;
    
} # _get_secondary_goids 
}
_get_isa_goidsdescriptionprevnextTop
sub _get_isa_goids {
    my ( $self, $line ) = @_;
    
    my @ids = ();
    
    $line =~ s/GO:\d{7}//;
    
    while ( $line =~ /%[^<^,]*?(GO:\d{7})/g ) {
        push( @ids, $1 );
    }
    return @ids; 
} # _get_isa_goids
}
_get_partof_goidsdescriptionprevnextTop
sub _get_partof_goids {
    my ( $self, $line ) = @_;
    
    my @ids = ();
    
    $line =~ s/GO:\d{7}//;
    
    while ( $line =~ /<[^%^,]*?(GO:\d{7})/g ) {
        push( @ids, $1 );
    }
    return @ids; 
    
    
} # _get_partof_goids
}
_count_spacesdescriptionprevnextTop
sub _count_spaces {
    my ( $self, $line ) = @_;
     
    if ( $line =~ /^([ ]+)/ ) {
         return length( $1 );
    }
    else {
         return 0;
    }
} # _count_spaces
}
_next_termdescriptionprevnextTop
sub _next_term {
    my ( $self ) = @_;

    if ( $self->_done() == TRUE ) {
        return undef;
    }
    
    my $line      = "";
    my $goid      = "";
    my $next_term = "";
    my $def       = "";
    my $comment   = "";
    my @def_refs  = ();
    
    while( $line = ( $self->_go_defs_file )->_readline() ) {
    
        if ( $line !~ /\S/ 
        ||   $line =~ /^\s*!/ ) {
            next;
        }
        
        elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
            $next_term = $1;
            if ( $self->_not_first_record() == TRUE ) {
                my $entry = $self->_create_GOentry( $self->_term(), $goid, $def, $comment,\@ def_refs );
                $self->_term( $next_term );
                return $entry;
            }
            else {
                $self->_term( $next_term );
                $self->_not_first_record( TRUE );
            }
        }
        elsif ( $line =~ /^\s*goid:\s*(.+)/ ) {
            $goid = $1;
        }
        elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
            $def = $1;   
        }
        elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
            push( @def_refs, $1 );  
        }
        elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
            $comment = $1;  
        }
    }
    $self->_done( TRUE );
    return $self->_create_GOentry( $self->_term(), $goid, $def, $comment,\@ def_refs );
} # _next_term
}
_go_enginedescriptionprevnextTop
sub _go_engine {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        $self->{ "_go_engine" } = $value;
    }
    
    return $self->{ "_go_engine" };
} # _go_enginee
}
_create_GOentrydescriptionprevnextTop
sub _create_GOentry {
    my ( $self, $name, $goid ) = @_;

    my $term = Bio::Ontology::GOterm->new();

    $term->GO_id( $goid );
    $term->name( $name );
    
    return $term;

} # _create_GOentry
}
_not_first_recorddescriptionprevnextTop
sub _not_first_record {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        unless ( $value == FALSE || $value == TRUE ) {
            $self->throw( "Argument to method\" _not_first_record\" must be either ".TRUE." or ".FALSE );
        }
        $self->{ "_not_first_record" } = $value;
    }
    
    return $self->{ "_not_first_record" };
} # _not_first_record
}
_donedescriptionprevnextTop
sub _done {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        unless ( $value == FALSE || $value == TRUE ) {
            $self->throw( "Found [$value] where [" . TRUE
            ." or " . FALSE . "] expected" );
        }
        $self->{ "_done" } = $value;
    }
    
    return $self->{ "_done" };
} # _done
}
_go_defs_filedescriptionprevnextTop
sub _go_defs_file {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        unless ( $value->isa( "Bio::Root::IO" ) ) {
            $self->throw( "Argument to method\" _go_defs_File\" is not a valid\" Bio::Root::IO\"" );
        }
        $self->{ "_go_defs_file" } = $value;
     
    }
    
    return $self->{ "_go_defs_file" };
} # _go_defs_file
}
_termdescriptionprevnextTop
sub _term {
    my ( $self, $value ) = @_;

    if ( defined $value ) {
        $self->{ "_term" } = $value;
    }
    
    return $self->{ "_term" };
} # _term  
}
General documentation
FEEDBACKTop
Mailing ListsTop
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to the
Bioperl mailing lists Your participation is much appreciated.
  bioperl-l@bioperl.org                         - General discussion
  http://bio.perl.org/MailList.html             - About the mailing lists
Reporting BugsTop
report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via
email or the web:
  bioperl-bugs@bio.perl.org
  http://bugzilla.bioperl.org/
AUTHORTop
Christian M. Zmasek
Email: czmasek@gnf.org or cmzmasek@hotmail.com
WWW: http://www.genetics.wustl.edu/eddy/people/zmasek/
Address:
  Genomics Institute of the Novartis Research Foundation
  10675 John Jay Hopkins Drive
  San Diego, CA 92121
APPENDIXTop
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _