Bio::Tools
GFF
Summary
Bio::Tools::GFF - A Bio::SeqAnalysisParserI compliant GFF format parser
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
use Bio::Tools::GFF;
# specify input via -fh or -file
my $gffio = Bio::Tools::GFF->new(-fh => \*STDIN, -gff_version => 2);
my $feature;
# loop over the input stream
while($feature = $gffio->next_feature()) {
# do something with feature
}
$gffio->close();
# you can also obtain a GFF parser as a SeqAnalasisParserI in
# HT analysis pipelines (see Bio::SeqAnalysisParserI and
# Bio::Factory::SeqAnalysisParserFactory)
my $factory = Bio::Factory::SeqAnalysisParserFactory->new();
my $parser = $factory->get_parser(-input => \*STDIN, -method => "gff");
while($feature = $parser->next_feature()) {
# do something with feature
}
Description
This class provides a simple GFF parser and writer. In the sense of a
SeqAnalysisParser, it parses an input file or stream into SeqFeatureI
objects, but is not in any way specific to a particular analysis
program and the output that program produces.
That is, if you can get your analysis program spit out GFF, here is
your result parser.
Methods
Methods description
Title : new
Usage :
Function: Creates a new instance. Recognized named parameters are -file, -fh,
and -gff_version.
Returns : a new object
Args : names parameters |
Title : next_feature
Usage : $seqfeature = $gffio->next_feature();
Function: Returns the next feature available in the input file or stream, or
undef if there are no more features.
Example :
Returns : A Bio::SeqFeatureI implementing object, or undef if there are no
more features.
Args : none |
Title : from_gff_string
Usage : $gff->from_gff_string($feature, $gff_string);
Function: Sets properties of a SeqFeatureI object from a GFF-formatted
string. Interpretation of the string depends on the version
that has been specified at initialization.
This method is used by next_feature(). It actually dispatches to
one of the version-specific (private) methods.
Example :
Returns : void
Args : A Bio::SeqFeatureI implementing object to be initialized
The GFF-formatted string to initialize it from |
Title : _from_gff1_string
Usage :
Function:
Example :
Returns : void
Args : A Bio::SeqFeatureI implementing object to be initialized
The GFF-formatted string to initialize it from |
Title : _from_gff2_string
Usage :
Function:
Example :
Returns : void
Args : A Bio::SeqFeatureI implementing object to be initialized
The GFF2-formatted string to initialize it from |
Title : write_feature
Usage : $gffio->write_feature($feature);
Function: Writes the specified SeqFeatureI object in GFF format to the stream
associated with this instance.
Example :
Returns :
Args : A Bio::SeqFeatureI implementing object to be serialized |
Title : gff_string
Usage : $gffstr = $gffio->gff_string($feature);
Function: Obtain the GFF-formatted representation of a SeqFeatureI object.
The formatting depends on the version specified at initialization.
This method is used by write_feature(). It actually dispatches to
one of the version-specific (private) methods.
Example :
Returns : A GFF-formatted string representation of the SeqFeature
Args : A Bio::SeqFeatureI implementing object to be GFF-stringified |
Title : _gff1_string
Usage : $gffstr = $gffio->_gff1_string
Function:
Example :
Returns : A GFF1-formatted string representation of the SeqFeature
Args : A Bio::SeqFeatureI implementing object to be GFF-stringified |
Title : _gff2_string
Usage : $gffstr = $gffio->_gff2_string
Function:
Example :
Returns : A GFF2-formatted string representation of the SeqFeature
Args : A Bio::SeqFeatureI implementing object to be GFF2-stringified |
Title : _gff_version
Usage : $gffversion = $gffio->gff_version
Function:
Example :
Returns : The GFF version this parser will accept and emit.
Args : none |
Methods code
sub new
{ my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
my ($gff_version) = $self->_rearrange([qw(GFF_VERSION)],@args);
$self->_initialize_io(@args);
$gff_version ||= 2;
if(($gff_version != 1) && ($gff_version != 2)) {
$self->throw("Can't build a GFF object with the unknown version ".
$gff_version);
}
$self->gff_version($gff_version);
return $self; } |
sub next_feature
{ my ($self) = @_;
my $gff_string;
while(($gff_string = $self->_readline()) && defined($gff_string)) {
next if($gff_string =~ /^\#/);
next if($gff_string =~ /^\s*$/);
last;
}
return undef unless $gff_string;
my $feat = Bio::SeqFeature::Generic->new();
$self->from_gff_string($feat, $gff_string);
return $feat;} |
sub from_gff_string
{ my ($self, $feat, $gff_string) = @_;
if($self->gff_version() == 1) {
$self->_from_gff1_string($feat, $gff_string);
} else {
$self->_from_gff2_string($feat, $gff_string);
}} |
sub _from_gff1_string
{ my ($gff, $feat, $string) = @_;
chomp $string;
my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @group) = split(/\t/, $string);
if ( !defined $frame ) {
$feat->throw("[$string] does not look like GFF to me");
}
$frame = 0 unless( $frame =~ /^\d+$/);
$feat->seqname($seqname);
$feat->source_tag($source);
$feat->primary_tag($primary);
$feat->start($start);
$feat->end($end);
$feat->frame($frame);
if ( $score eq '.' ) {
} else {
$feat->score($score);
}
if ( $strand eq '-' ) { $feat->strand(-1); }
if ( $strand eq '+' ) { $feat->strand(1); }
if ( $strand eq '.' ) { $feat->strand(0); }
foreach my $g ( @group ) {
if ( $g =~ /(\S+)=(\S+)/ ) {
my $tag = $1;
my $value = $2;
$feat->add_tag_value($1, $2);
} else {
$feat->add_tag_value('group', $g);
}
}} |
sub _from_gff2_string
{ my ($gff, $feat, $string) = @_;
chomp($string);
my ($seqname, $source, $primary, $start, $end, $score, $strand, $frame, @attribs) = split(/\t+/, $string);
my $attribs = join '', @attribs; if ( !defined $frame ) {
$feat->throw("[$string] does not look like GFF2 to me");
}
$feat->seqname($seqname);
$feat->source_tag($source);
$feat->primary_tag($primary);
$feat->start($start);
$feat->end($end);
$feat->frame($frame);
if ( $score eq '.' ) {
} else {
$feat->score($score);
}
if ( $strand eq '-' ) { $feat->strand(-1); }
if ( $strand eq '+' ) { $feat->strand(1); }
if ( $strand eq '.' ) { $feat->strand(0); }
my @att = split //, $attribs; my $num = $#att; my $flag = 0;
my @parsed; for (my $a = 0; $a <= $num ; $a +=1){ if ($att[$a] eq "\""){$flag=($flag==0)?1:0} if (($att[$a] eq ";") && $flag){$att[$a] = "INSERT_SEMICOLON_HERE"} if (($att[$a] eq "#") && !$flag){last} push @parsed, $att[$a] }
$attribs = join "", @parsed;
my @key_vals = split /;/, $attribs;
foreach my $pair ( @key_vals ) {
$pair =~ s/INSERT_SEMICOLON_HERE/;/g; my ($blank, $key, $values) = split /^\s*([\w\d]+)\s/, $pair;
my @values;
while ($values =~ s/"(.*?)"//){ push @values, $1; }
my @othervals = split /\s+/, $values; foreach my $othervalue(@othervals){
if (CORE::length($othervalue) > 0){push @values, $othervalue} }
foreach my $value(@values){
$feat->add_tag_value($key, $value);
}
} } |
sub write_feature
{ my ($self, $feature) = @_;
$self->_print($self->gff_string($feature)."\n");} |
sub gff_string
{ my ($self, $feature) = @_;
if($self->gff_version() == 1) {
return $self->_gff1_string($feature);
} else {
return $self->_gff2_string($feature);
}} |
sub _gff1_string
{ my ($gff, $feat) = @_;
my ($str,$score,$frame,$name,$strand);
if( $feat->can('score') ) {
$score = $feat->score();
}
$score = '.' unless defined $score;
if( $feat->can('frame') ) {
$frame = $feat->frame();
}
$frame = '.' unless defined $frame;
$strand = $feat->strand();
if(! $strand) {
$strand = ".";
} elsif( $strand == 1 ) {
$strand = '+';
} elsif ( $feat->strand == -1 ) {
$strand = '-';
}
if( $feat->can('seqname') ) {
$name = $feat->seqname();
$name ||= 'SEQ';
} else {
$name = 'SEQ';
}
$str = join("\t",
$name,
$feat->source_tag(),
$feat->primary_tag(),
$feat->start(),
$feat->end(),
$score,
$strand,
$frame);
foreach my $tag ( $feat->all_tags ) {
foreach my $value ( $feat->each_tag_value($tag) ) {
$str .= " $tag=$value";
}
}
return $str;} |
sub _gff2_string
{ my ($gff, $feat) = @_;
my ($str,$score,$frame,$name,$strand);
if( $feat->can('score') ) {
$score = $feat->score();
}
$score = '.' unless defined $score;
if( $feat->can('frame') ) {
$frame = $feat->frame();
}
$frame = '.' unless defined $frame;
$strand = $feat->strand();
if(! $strand) {
$strand = ".";
} elsif( $strand == 1 ) {
$strand = '+';
} elsif ( $feat->strand == -1 ) {
$strand = '-';
}
if( $feat->can('seqname') ) {
$name = $feat->seqname();
$name ||= 'SEQ';
} else {
$name = 'SEQ';
}
$str = join("\t",
$name,
$feat->source_tag(),
$feat->primary_tag(),
$feat->start(),
$feat->end(),
$score,
$strand,
$frame);
my $valuestr;
if ($feat->all_tags){ $str .= "\t"; foreach my $tag ( $feat->all_tags ) {
my $valuestr; foreach my $value ( $feat->each_tag_value($tag) ) {
if ($value =~ /[^A-Za-z0-9_]/){
$value =~ s/\t/\\t/g; $value =~ s/\n/\\n/g; $value = '"' . $value . '" '} $value = "\"\"" unless $value; $valuestr .= $value . " "; }
$str .= "$tag $valuestr ; "; }
chop $str; chop $str }
return $str;} |
sub gff_version
{ my ($self, $value) = @_;
if(defined $value && (($value == 1) || ($value == 2))) {
$self->{'GFF_VERSION'} = $value;
}
return $self->{'GFF_VERSION'};} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bio.perl.org/bioperl-bugs/
| AUTHOR - Matthew Pocock | Top |
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _