Bio::SeqIO tigr
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Summary
Bio::SeqIO::tigr - TIGR XML sequence input/output stream
Package variables
No package variables defined.
Included modules
Bio::Annotation::Comment
Bio::Seq::RichSeq(1)
Bio::Seq::RichSeq(2)
Bio::Seq::SeqFactory
Bio::SeqFeature::Generic
Bio::SeqIO
Bio::Species
Data::Dumper
Inherit
Bio::SeqIO
Synopsis
Do not use this module directly. Use it via the Bio::SeqIO class.
Description
This object can transform Bio::Seq objects to and from efa flat
file databases.
Methods
_initialize
No description
Code
next_seq()
No description
Code
_get_sequence
No description
Code
_process_seq
No description
Code
_process_seq_tu()
No description
Code
_add_species
No description
Code
_add_source
No description
Code
_process
No description
Code
_process_tigr
No description
Code
_process_pseudochromosome
No description
Code
_process_assembly
No description
Code
_process_assembly_seq()
No description
Code
_process_coordset($)
No description
Code
_process_header
No description
Code
_process_gene_list
No description
Code
_process_protein_coding
No description
Code
_process_rna_genes
No description
Code
_process_misc_info
No description
Code
_process_repeat_list
No description
Code
_process_scaffold
No description
Code
_process_tu
No description
Code
_process_gene_info
No description
Code
_process_model
No description
Code
_process_cdna_support
No description
Code
_process_exon
No description
Code
_process_cds
No description
Code
_process_utrs
No description
Code
_process_left_utr
No description
Code
_process_right_utr
No description
Code
_process_ext_utr
No description
Code
_readtag
No description
Code
_readline
No description
Code
throw
No description
Code
Methods description
None available.
Methods code
_initializedescriptionprevnextTop
sub _initialize {
	my($self, @args) = @_;

	$self->SUPER::_initialize(@args);
	($self->{'tu'}, $self->{'model'})
	 = $self->_rearrange([qw(TU MODEL)], @args);
	$self->{'tu'}      = 1 if !defined($self->{'tu'});
	$self->{'model'}   = 1 if !defined($self->{'model'});
	$self->{'eof'}     = 0;
	$self->sequence_factory(new Bio::Seq::SeqFactory(
			-type => 'Bio::Seq::RichSeq'));

	$self->_process();
	my $list=$self->{'ASSEMBLY'}->{'GENE_LIST'}->{'PROTEIN_CODING'}->{'TU'};
}
next_seq()descriptionprevnextTop
sub next_seq() {
	my ($self) = @_;
	my $list=$self->{'ASSEMBLY'}->{'GENE_LIST'}->{'PROTEIN_CODING'}->{'TU'};

	return 0 if $self->{'eof'} == 1;

	if(defined(@{$self->{'seq'}}) and @{$self->{'seq'}} > 0) {
		return shift @{$self->{'seq'}};
	}

	while($self->{'eof'} == 0) {
		until((defined(@$list) and @$list > 0) or $self->{'eof'} == 1) {
			$self->_process();
		}

		while(defined(@$list) and @$list > 0 and
			(!defined(@{$self->{'seq'}}) or @{$self->{'seq'}} == 0)
		) {
			$self->_process_seq();
		}

		if(defined(@{$self->{'seq'}}) and @{$self->{'seq'}} > 0) {
			return shift @{$self->{'seq'}};
		}
	}

	return undef;
}
_get_sequencedescriptionprevnextTop
sub _get_sequence {
	my($self, $start, $end) = @_;
	my $dir   = ($start < $end)? 1 : -1;
	my $len   = ($end - $start) * $dir + 1;
	my $seqstr;


	if($dir == -1) {
		$seqstr = reverse(substr(
			$self->{'ASSEMBLY'}->{'ASSEMBLY_SEQUENCE'},
			$end - 1, $len));
		$seqstr =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
	} else {
		$seqstr = substr(
			$self->{'ASSEMBLY'}->{'ASSEMBLY_SEQUENCE'},
			$start - 1, $len);
	}
	return $seqstr;
}
_process_seqdescriptionprevnextTop
sub _process_seq {
	my ($self) = @_;
	my $list=$self->{'ASSEMBLY'}->{'GENE_LIST'}->{'PROTEIN_CODING'}->{'TU'};
	my $kw = join(' ', @{$self->{'ASSEMBLY'}->{'HEADER'}->{'KEYWORDS'}})
		if defined($self->{'ASSEMBLY'}->{'HEADER'}->{'KEYWORDS'});

	my $tu = shift(@$list);
	push(@{$self->{'seq'}}, $self->_process_seq_tu($tu))
		if defined($self->{'tu'});
}
_process_seq_tu()descriptionprevnextTop
sub _process_seq_tu() {
	my ($self, $tu) = @_;
	my $start  = $tu->{'COORDSET'}->{'END5'};
	my $end    = $tu->{'COORDSET'}->{'END3'};
	my $seqstr = $self->_get_sequence($start, $end);
	
	my $id = $tu->{GENE_INFO}->{PUB_LOCUS};
	if(!defined($id) or $id =~ /^\s*$/o) {
		$id = $tu->{FEAT_NAME};
	}

	my $seq = $self->sequence_factory()->create(
		-id => $id,
		-seq => $seqstr,
		-accession_number => $id
	);

	$seq->primary_id($id);
	$seq->add_date($tu->{'DATE'}) if defined($tu->{'DATE'});
	$seq->add_date($tu->{'GENE_INFO'}->{'DATE'});
	$seq->add_secondary_accession($tu->{'FEAT_NAME'}) unless $tu->{FEAT_NAME} eq $id;
	$seq->add_secondary_accession($tu->{'GENE_INFO'}->{'LOCUS'});
	$seq->add_secondary_accession($tu->{'GENE_INFO'}->{'ALT_LOCUS'})
		if defined($tu->{'GENE_INFO'}->{'ALT_LOCUS'});

	$seq->description(join(' | ',
		'genomic nucleotide',
		$tu->{'GENE_INFO'}->{'COM_NAME'}
	));

	my $strand = 1;
	$strand = -1 if $start > $end;
	
	my $source = new Bio::SeqFeature::Generic(
		-primary => 'source',
		-start   => 1,
		-end     => $seq->length(),
		-tag     => {
			date    =>  $tu->{DATE},
			end5    =>  $tu->{COORDSET}->{END5},
			end3    =>  $tu->{COORDSET}->{END3},
			strand  => ($tu->{COORDSET}->{END5} < $tu->{COORDSET}->{END3} ? "positive" : "negative"),
		}
	);

	$self->_add_source($source);
	$seq->add_SeqFeature($source);

	foreach my $model (@{$tu->{'MODEL'}}) {
		my @exons;
		my @cds;
		my $mstart = $strand*($model->{'COORDSET'}->{'END5'} - $start) + 1;
		my $mend   = $strand*($model->{'COORDSET'}->{'END3'} - $start) + 1;

		my $locus = $model->{PUB_LOCUS};
		if(!defined($locus) or $locus =~ /^\s*$/) {
			$locus = $model->{FEAT_NAME};
		}

		$seq->add_secondary_accession($locus);
		$seq->add_secondary_accession($model->{FEAT_NAME}) unless $locus eq $model->{FEAT_NAME};

		my $mfeat = new Bio::SeqFeature::Generic(
			-primary     => 'MODEL',
			-start       => $mstart,
			-end         => $mend,
			-tag => {
				locus => $locus,
				feat_name => $model->{'FEAT_NAME'}
			}
		);
		$seq->add_SeqFeature($mfeat);

		foreach my $exon (@{$model->{'EXON'}}) {
			my $estart = $strand * ($exon->{'COORDSET'}->{'END5'} - $start) + 1;
			my $eend   = $strand * ($exon->{'COORDSET'}->{'END3'} - $start) + 1;

			push @exons, {
				"start" => $estart,
				"end"   => $eend,
				"date"  => $exon->{'DATE'},
				"locus" => $exon->{'FEAT_NAME'},
			};


			if(defined($exon->{'CDS'})) {
				my $c = $exon->{'CDS'};
				my $cstart = $strand * ($c->{'COORDSET'}->{'END5'} - $start) + 1;
				my $cend   = $strand * ($c->{'COORDSET'}->{'END3'} - $start) + 1;

				push @cds, {
					"start" => $cstart,
					"end"   => $cend,
					"date"  => $c->{'DATE'},
					"locus" => $c->{'FEAT_NAME'},
				};
			}

			if(defined($exon->{'UTRS'})) {
				my $u = $exon->{'UTRS'};

				while( defined @{$u->{'LEFT_UTR'}} and 0 < @{$u->{'LEFT_UTR'}}) {
					my $lutr = shift @{$u->{'LEFT_UTR'}};
					my $us = $strand * ($lutr->{'END5'} - $start) + 1;
					my $ue = $strand * ($lutr->{'END3'} - $start) + 1;

					$seq->add_SeqFeature(new Bio::SeqFeature::Generic(
						-primary => 'LEFT_UTR',
						-start   => $us,
						-end     => $ue,
						-tag     => {
							locus => $model->{'PUB_LOCUS'} || $model->{'FEAT_NAME'}
						}
					));
				}

				while( defined @{$u->{'RIGHT_UTR'}} and 0 < @{$u->{'RIGHT_UTR'}}) {
					my $rutr = shift @{$u->{'RIGHT_UTR'}};
					my $us = $strand * ($rutr->{'END5'} - $start) + 1;
					my $ue = $strand * ($rutr->{'END3'} - $start) + 1;

					$seq->add_SeqFeature(new Bio::SeqFeature::Generic(
						-primary => 'RIGHT_UTR',
						-start   => $us,
						-end     => $ue,
						-tag => {
							locus => $model->{'PUB_LOCUS'} || $model->{'FEAT_NAME'}
						}
					));
				}

				while( defined @{$u->{'EXTENDED_UTR'}} and 0 < @{$u->{'EXTENDED_UTR'}}) {
					my $eutr = shift @{$u->{'EXTENDED_UTR'}};
					my $us = $strand * ($eutr->{'END5'} - $start) + 1;
					my $ue = $strand * ($eutr->{'END3'} - $start) + 1;

					$seq->add_SeqFeature(new Bio::SeqFeature::Generic(
						-primary => 'EXTENDED_UTR',
						-start   => $us,
						-end     => $ue,
						-tag => {
							locus => $model->{'PUB_LOCUS'} || $model->{'FEAT_NAME'}
						}
					));
				}
			}
		}

		my $loc = new Bio::Location::Split();
		foreach my $e (@exons) {
			$loc->add_sub_Location(new Bio::Location::Simple(
					-start => $e->{'start'},
					-end   => $e->{'end'}
			));
		}

		my $efeat = new Bio::SeqFeature::Generic(
			-primary      => 'EXON',
			-location     => $loc,
			-tag => {
				locus => $model->{'PUB_LOCUS'} || $model->{'FEAT_NAME'}
			}
		);
		$seq->add_SeqFeature($efeat);

		if(scalar(@cds) > 0) {
			$loc = new Bio::Location::Split();
			foreach my $c (@cds) {
				$loc->add_sub_Location(new Bio::Location::Simple(
						-start => $c->{'start'},
						-end   => $c->{'end'}
				));
			}

			my $cfeat = new Bio::SeqFeature::Generic(
				-primary     => 'CDS',
				-location    => $loc,
				-tag => {
					locus => $model->{'PUB_LOCUS'} || $model->{'FEAT_NAME'}
				}
			);

			my $trans;
			eval {
				$trans = new Bio::PrimarySeq(
					-id => $seq->primary_id(),
					-seq => $seq->subseq($loc)
				);
				$cfeat->add_tag_value('translation',
					$trans->translate(undef, undef, undef, undef, 1, 0)->seq()
				);
			};
			if($@) {
				$self->warn("Unable to translate protein. Probably a psuedo-protein.");
			} else {
				$seq->add_SeqFeature($cfeat);
			}
		}
	}
	
	my $kw = $self->{'ASSEMBLY'}->{'HEADER'}->{'KEYWORDS'};
	$seq->keywords(join(' ', @$kw)) if defined(@$kw);
	$self->_add_species($seq);
	
	while(my $comment = shift @{$tu->{'GENE_INFO'}->{'COMMENTS'}}) {
		my $com = new Bio::Annotation::Comment('-text' => $comment);
		$seq->annotation()->add_Annotation('comment', $com);
	}
	
	return $seq;
}
_add_speciesdescriptionprevnextTop
sub _add_species {
	my($self, $seq) = @_;
	my $o = $self->{'ASSEMBLY'}->{'HEADER'}->{'ORGANISM'};
	my $lineage = $self->{'ASSEMBLY'}->{'HEADER'}->{'LINEAGE'};
	my $species = new Bio::Species(-classification => $lineage);
	my ($genus, $spec, @sub) = split(/ /, $o);

	$species->genus($genus);
	$species->species($spec);
	$species->sub_species(join(' ', @sub));

	$seq->species($species);
}
_add_sourcedescriptionprevnextTop
sub _add_source {
	my($self, $source) = @_;
	$source->add_tag_value("chromosome", $self->{ASSEMBLY}->{CHROMOSOME});
	$source->add_tag_value("clone", $self->{ASSEMBLY}->{HEADER}->{CLONE_NAME});
}
_processdescriptionprevnextTop
sub _process {
	my($self) = @_;
	my $line;
	my $tu = undef;

	return if $self->{'eof'} == 1;

	do {
		$line = $self->_readline();
		if($line =~ /<\?xml\s+version\s+=\s+"\d+\.\d+"\?>/o) {
			# do nothing
} elsif ($line =~ /<!DOCTYPE (\w+) SYSTEM "[\w\.]+">/o) { $self->throw("DOCTYPE of $1, not TIGR!") if $1 ne "TIGR" ; } elsif ($line =~ /<TIGR>/o) { $self->_pushback($line); $self->_process_tigr(); } elsif ($line =~ /<ASSEMBLY.*?>/o) { $self->_pushback($line); $self->_process_assembly(); } elsif ($line =~ /<\/TIGR>/o) { $self->{'eof'} = 1; return; } else { $self->throw("Unknown or Invalid process directive:", join('', ($line =~ /^\s*(<[^>]+>)/o))); } $tu = $self->{'ASSEMBLY'}->{'GENE_LIST'}->{'PROTEIN_CODING'}; } until((defined($tu->{'TU'}) and @{$tu->{'TU'}} != 0) or !defined($line));
}
_process_tigrdescriptionprevnextTop
sub _process_tigr {
	my($self) = @_;
	my $line;

	$line = $self->_readline();
	if($line !~ /<TIGR>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_tigr called but no ",
		             "<TIGR> found in stream");
	}

	$line = $self->_readline();
	if($line =~ /<PSEUDOCHROMOSOME>/o) {
		$self->_pushback($line);
		$self->_process_pseudochromosome();
	} elsif ($line =~ /<ASSEMBLY.*?>/o) {
		$self->_pushback($line);
		$self->{'ASSEMBLY'} = $self->_process_assembly();
	}
}
_process_pseudochromosomedescriptionprevnextTop
sub _process_pseudochromosome {
	my($self) = @_;
	my $line;

	$line = $self->_readline();
	return if $line !~ /<PSEUDOCHROMOSOME>/o;

	$line = $self->_readline();

	if($line =~ /<SCAFFOLD>/o) {
		$self->_pushback($line);
		$self->_process_scaffold();
		$line = $self->_readline();
	} else {
		$self->warn( "No Scaffold found in <PSUEDOCHROMOSOME> this " .
		             "is a violation of the TIGR dtd, but we ignore " .
			     "it so we are ignoring the error\n"
		);
	}

	if($line =~ /<ASSEMBLY.*>/o) {
		$self->_pushback($line);
		$self->{'ASSEMBLY'} = $self->_process_assembly();
		$line = $self->_readline();
	} else {
		$self->throw("Missing required ASSEMBLY in <PSEUDOCHROMOSOME>");
	}

	if($line =~ /<\/PSEUDOCHROMOSOME>/) {
		return;
	}

	$self->throw("Reached end of _process_psuedochromosome");
}
_process_assemblydescriptionprevnextTop
sub _process_assembly {
	my($self) = @_;
	my $line;
	my $assembly;

	
	$line = $self->_readline();
	if($line !~ /<ASSEMBLY([^>]*)>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_assembly called ",
		             "but no <ASSEMBLY> found in stream");
	}


	my %attribs = ($1 =~ /(\w+)\s*=\s+"(.*?)"/og);
	foreach my $key (keys(%attribs)) {
		$assembly->{$key} =  $attribs{$key};
	}

	$line = $self->_readline();
	my($attr, $val); 
	if(($attr, $val) = ($line =~ /<ASMBL_ID([^>]*)>([^<]*)<\/ASMBL_ID>/o)) {
		%attribs = ($attr =~ /(\w+)\s*=\s+"(.*?)"/og);
		foreach my $key (keys(%attribs)) {
			$assembly->{$key} =  $attribs{$key};
		}
		$assembly->{'ASMBL_ID'} = $val;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <ASMBL_ID> missing");
	}

	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$assembly->{'COORDSET'} = $self->_process_coordset();
		$line = $self->_readline();
	} else {
		$self->throw("Required <COORDSET> missing");
	}

	if($line =~ /<HEADER>/o) {
		$self->_pushback($line);
		$assembly->{'HEADER'} = $self->_process_header();
		$line = $self->_readline();
	} else {
		$self->throw("Required <HEADER> missing");
	}

	if($line =~ /<TILING_PATH>/o) {
		$self->_pushback($line);
		$assembly->{'TILING_PATH'} = $self->_process_tiling_path();
		$line = $self->_readline();
	}

	if($line =~ /<GENE_LIST>/o) {
		$self->_pushback($line);
		$assembly->{'GENE_LIST'} = $self->_process_gene_list();
		$line = $self->_readline();
	} else {
		$self->throw("Required <GENE_LIST> missing");
	}

	if($line =~ /<MISC_INFO>/o) {
		$self->_pushback($line);
		$assembly->{'MISC_INFO'} = $self->_process_misc_info();
		$line = $self->_readline();
	}

	if($line =~ /<REPEAT_LIST>/o) {
		$self->_pushback($line);
		$assembly->{'REPEAT_LIST'} = $self->_process_repeat_list();
		$line = $self->_readline();
	}

	if($line =~ /<ASSEMBLY_SEQUENCE>/o) {
		$self->_pushback($line);
		$assembly->{'ASSEMBLY_SEQUENCE'}=$self->_process_assembly_seq();
		$line = $self->_readline();
	} else {
		$self->throw("Required <ASSEMBLY_SEQUENCE> missing");
	}

	if($line =~ /<\/ASSEMBLY>/o) {
		return $assembly;
	}

	$self->throw("Reached the end of <ASSEMBLY>");
}
_process_assembly_seq()descriptionprevnextTop
sub _process_assembly_seq() {
	my ($self) = @_;
	my $line;
	
	$line = $self->_readline();
	if($line !~ /<ASSEMBLY_SEQUENCE>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_assembly_seq called ".
			     "with no <ASSEMBLY_SEQUENCE> in the stream");
	}

	$line = $self->_readline();
	if($line =~ /^(.+)<\/ASSEMBLY_SEQUENCE>/o) {
		return $1;
	}

	$self->throw("Reached end of _proces_assembly");
}
_process_coordset($)descriptionprevnextTop
sub _process_coordset($) {
	my ($self) = @_;
	my $line;
	my $h;

	$line = $self->_readline();
	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$line = $self->_readtag();
		($h->{'END5'}, $h->{'END3'}) = ($line =~ /<COORDSET>\s*<END5>\s*(\d+)\s*<\/END5>\s*<END3>\s*(\d+)\s*<\/END3>/os);
		if(!defined($h->{'END5'}) or !defined($h->{'END3'})) {
			$self->throw("Invalid <COORDSEt>");
		}
		return $h;
	} else {
		$self->throw("Bio::SeqIO::tigr::_process_coordset() called ",
		             "but no <COORDSET> found in stream");
	}
}
_process_headerdescriptionprevnextTop
sub _process_header {
	my ($self) = @_;
	my $header;
	my $line = $self->_readline();

	if($line !~ /<HEADER>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_header called ",
		             "but no <HEADER> found in stream");
	}

	$line = $self->_readtag();
	if($line =~ /<CLONE_NAME>([^>]+)<\/CLONE_NAME>/o) {
		$header->{'CLONE_NAME'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <CLONE_NAME> missing");
	}

	if($line =~ /<SEQ_LAST_TOUCHED>/o) {
		($header->{'SEQ_LAST_TOUCHED'}) =
			($line =~ /<DATE>([^<]+)<\/DATE>/o);
		$line = $self->_readtag();
	} else {
		$self->throw("Reqired <SEQ_LAST_TOUCHED> missing");
	}

	if($line =~ /<GB_ACCESSION>([^<]*)<\/GB_ACCESSION>/o) {
		$header->{'GB_ACCESSION'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <GB_ACCESSION> missing");
	}

	if($line =~ /<ORGANISM>([^<]*)<\/ORGANISM>/o) {
		$header->{'ORGANISM'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <ORGANISM> missing");
	}

	if($line =~ /<LINEAGE>([^<]*)<\/LINEAGE>/o) {
		@{$header->{'LINEAGE'}}
			= reverse(split(/\s*;\s*/o, $1));
		$line = $self->_readtag();
	} else {
		$self->throw("Required <LINEAGE> missing");
	}

	if($line =~ /<SEQ_GROUP>([^<]*)<\/SEQ_GROUP>/o) {
		$header->{'SEQ_GROUP'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <SEQ_GROUP> missing");
	}

	while($line =~ /<KEYWORDS>[^<]<\/KEYWORDS>/o) {
		push(@{$header->{'KEYWORDS'}}, $1);
		$line = $self->_readtag();
	}

	while($line =~ /<GB_DESCRIPTION>([^<]+)<\/GB_DESCRIPTION>/o) {
		push(@{$header->{'GB_DESCRIPTION'}},$1);
		$line = $self->_readtag();
	}

	while($line =~ /<GB_COMMENT>([^<]+)<\/GB_COMMENT>/o) {
		push(@{$header->{'GB_COMMENT'}}, $1);
		$line = $self->_readtag();
	}

	if(my %h = ($line =~ /<AUTHOR_LIST(?:\s*(\w+)\s*=\s*"([^"]+)"\s*)*>/o)) {
		$header->{'AUTHOR_LIST'}=$h{'CONTACT'};
		while($line !~ /<\/AUTHOR_LIST>/o) {
			$self->_readtag();
		}
		$line = $self->_readline();
	} else {
		$self->throw("Required <AUTHOR_LIST> missing");
	}

	if($line =~ /<\/HEADER>/o) {
		return $header;
	}

	$self->throw("Reached end of header\n");
}
_process_gene_listdescriptionprevnextTop
sub _process_gene_list {
	my($self) = @_;
	my $gene;
	my $line;

	$line = $self->_readline();
	if($line !~ /<GENE_LIST>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_gene_list called ",
		             "but no <GENE_LIST> in the stream");
	}

	$line = $self->_readline();
	if($line =~ /<PROTEIN_CODING>/o) {
		$self->_pushback($line);
		$gene->{'PROTEIN_CODING'} = $self->_process_protein_coding();
		$line = $self->_readline();
	} else {
		$self->throw("Required <PROTEIN_CODING> missing");
	}

	if($line =~ /<RNA_GENES>/o) {
		$self->_pushback($line);
		$gene->{'RNA_GENES'} = $self->_process_rna_genes();
		$line = $self->_readline();
	} else {
		$self->throw("Required <RNA_GENES> missing");
	}

	if($line =~ /<\/GENE_LIST>/o) {
		return $gene;
	}

	$self->throw("Reached end of _process_gene_list");
}
_process_protein_codingdescriptionprevnextTop
sub _process_protein_coding {
	my ($self) = @_;
	my $prot;
	my $line = $self->_readline();

	if($line !~ /<PROTEIN_CODING>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_protein_coding called"
		             . "but no <GENE_LIST> in the stream");
	}

	$line = $self->_readline();
	while($line =~ /<TU>/o) {
		$self->_pushback($line);
		push(@{$prot->{'TU'}}, $self->_process_tu());
		$line = $self->_readline();
	}

	if($line =~ /<\/PROTEIN_CODING>/o) {
		return $prot;
	}

	$self->throw("Reached end of _process_protein_coding");
}
_process_rna_genesdescriptionprevnextTop
sub _process_rna_genes {
	my ($self) = @_;
	my $line = $self->_readline();

	if($line =~ /<RNA_GENES>/o) {
		while($line !~ /<\/RNA_GENES>/o) {
			$line = $self->_readline();
		}
	} else {
		$self->throw("Bio::SeqIO::tigr::_process_rna_genes called ",
		             "but no <RNA_GENES> in the stream");
	}
}
_process_misc_infodescriptionprevnextTop
sub _process_misc_info {
	my ($self) = @_;
	my $line = $self->_readline();

	if($line =~ /<MISC_INFO>/o) {
		while($line !~ /<\/MISC_INFO>/o) {
			$line = $self->_readline();
		}
	} else {
		$self->throw("Bio::SeqIO::tigr::_process_misc_info called ",
		             "but no <MISC_INFO> in the stream");
	}
}
_process_repeat_listdescriptionprevnextTop
sub _process_repeat_list {
	my ($self) = @_;
	my $line = $self->_readline();

	if($line =~ /<REPEAT_LIST>/o) {
		while($line !~ /<\/REPEAT_LIST>/o) {
			$line = $self->_readline();
		}
	} else {
		$self->throw("Bio::SeqIO::tigr::_process_repeat_list called ",
		             "but no <MISC_INFO> in the stream");
	}
}
_process_scaffolddescriptionprevnextTop
sub _process_scaffold {
	my ($self) = @_;
	my $line;

	# for now we just skip them
$line = $self->_readline(); return if $line !~ /<SCAFFOLD>/o; do { $line = $self->_readline(); } while(defined($line) && $line !~ /<\/SCAFFOLD>/o);
}
_process_tudescriptionprevnextTop
sub _process_tu {
	my($self) = @_;
	my $line = $self->_readline();
	my $tu;

	if($line !~ /<TU>/o) {
		$self->throw("Process_tu called when no <TU> tag");
	}

	$line = $self->_readtag();
	if ($line =~ /<FEAT_NAME>([\w\.]+)<\/FEAT_NAME>/o) {
		$tu->{'FEAT_NAME'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Invalid Feat_Name");
	}

	while($line =~ /<GENE_SYNONYM>/o) {
		$line = $self->_readtag();
	}
	
	while($line =~ /<CHROMO_LINK>([\w\.]+)<\/CHROMO_LINK>/o) {
		 push @{$tu->{'CHROMO_LINK'}}, $1;
		 $line = $self->_readtag();
	}

	if ($line =~ /<DATE>([^>]+)<\/DATE>/o) {
		$tu->{'DATE'} = $1;
		$line = $self->_readline();
	} else {
		$self->throw("Invalid Date");
	}

	if ($line =~ /<GENE_INFO>/o) {
		$self->_pushback($line);
		$tu->{'GENE_INFO'} = $self->_process_gene_info();
		$line = $self->_readline();
	} else {
		$self->throw("Invalid Gene_Info");
	}

	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$tu->{'COORDSET'} = $self->_process_coordset();
		$line = $self->_readline();
	} else {
		$self->throw("Invalid Coordset");
	}

	if($line =~ /<MODEL[^>]*>/o) {
		do {
			$self->_pushback($line);
			push(@{$tu->{'MODEL'}},
				$self->_process_model());
			$line = $self->_readline();
		} while($line =~ /<MODEL[^>]*>/o);
		$self->_pushback($line);
		$line = $self->_readtag();
	} else {
		$self->throw("Expected <MODEL> not found");
	}

	if($line =~ /<TRANSCRIPT_SEQUENCE>/o) {
		$line = $self->_readtag();
	}

	if($line =~ /<GENE_EVIDENCE>/o) {
		$line = $self->_readtag();
	}

	while($line =~ /<URL[^>]*>[^<]*<\/URL>/o) {
		$line = $self->_readtag();
	}

	if($line =~ /<\/TU>/o) {
		return $tu;
	} else {
		$self->throw("Expected </TU> not found: $line");
	}
}
_process_gene_infodescriptionprevnextTop
sub _process_gene_info {
	my($self) = @_;
	my $line = $self->_readline();
	my $geneinfo;

	$self->throw("Invalid Gene Info: $line") if $line !~ /<GENE_INFO>/o;
	$line = $self->_readline();

	if($line =~ /<LOCUS>([^>]*)<\/LOCUS>/o) {
		$geneinfo->{'LOCUS'} = $1;
		$line = $self->_readline();
	} else {
		$self->throw("Invalid Locus");
	}

	if($line =~ /<ALT_LOCUS>([^>]+)<\/ALT_LOCUS>/o) {
		$geneinfo->{'ALT_LOCUS'} = $1;
		$line = $self->_readline();
	}

	if($line =~ /<PUB_LOCUS>([^>]+)<\/PUB_LOCUS>/o) {
		$geneinfo->{'PUB_LOCUS'} = $1;
		$line = $self->_readtag();
	} else {
#		$self->throw("Invalid Pub_Locus");
} if($line =~ /<COM_NAME\s+CURATED="(\d+)">([^>]+)<\/COM_NAME>/o) { $geneinfo->{'CURATED'} = $1; $geneinfo->{'COM_NAME'} = $2; $line = $self->_readtag(); } else { $self->throw("invalid com_name"); } while($line =~ /<COMMENT>([^<]+)<\/COMMENT>/o) { push(@{$geneinfo->{'COMMENTS'}}, $1); $line = $self->_readtag(); } while($line =~ /<PUB_COMMENT>([^<]+)<\/PUB_COMMENT>/o) { push(@{$geneinfo->{'COMMENTS'}}, $1); $line = $self->_readtag(); } if($line =~ /<EC_NUM>([\w\-\\\.]+)<\/EC_NUM>/o) { $geneinfo->{'EC_NUM'} = $1; $line = $self->_readtag(); } if($line =~ /<GENE_SYM>([^<]+)<\/GENE_SYM>/o) { $geneinfo->{'GENE_SYM'} = $1; $line = $self->_readtag(); } if($line =~ /<IS_PSEUDOGENE>([^>]+)<\/IS_PSEUDOGENE>/o) { $geneinfo->{'IS_PSEUDOGENE'} = $1; $line = $self->_readtag(); } else { $self->throw("invalid is_pseudogene: $line"); } if($line =~ /<FUNCT_ANNOT_EVIDENCE/o) { $line = $self->_readtag(); } if($line =~ /<DATE>([^>]+)<\/DATE>/o) { $geneinfo->{'DATE'} = $1; $line = $self->_readtag(); } if($line =~ /<GENE_ONTOLOGY>/o) { until($line =~ /<\/GENE_ONTOLOGY>/o) { $line = $self->_readline(); } $line = $self->_readline(); } if($line =~ /<\/GENE_INFO/o) { return $geneinfo; } $self->throw("unexpected end of gene_info");
}
_process_modeldescriptionprevnextTop
sub _process_model {
	my($self) = @_;
	my $line;
	my $model;

	$line = $self->_readline();
	if($line !~ /<MODEL ([^>]+)>/o) {
		$self->throw("Invalid Model: $line")
	}
	my %attribs = ($1 =~ /(\w+)\s*=\s*"([^"]*)"/og);
	$model->{'CURATED'} = $attribs{'CURATED'};
	$line = $self->_readline();

	if($line =~ /<FEAT_NAME>([^>]+)<\/FEAT_NAME>/o) {
		$model->{'FEAT_NAME'} = $1;
		$line = $self->_readline();
	} else {
		$self->throw("Invalid Feature Name: $line");
	}

	if($line =~ /<PUB_LOCUS>([^>]+)<\/PUB_LOCUS>/o) {
		$model->{'PUB_LOCUS'} = $1;
		$line = $self->_readline();
	} else {
#		$self->throw("Invalid Pub_Locus: $line");
} if($line =~ /<CDNA_SUPPORT>/o) { $self->_pushback($line); $self->_process_cdna_support(); $line = $self->_readline(); } while($line =~ /<CHROMO_LINK>([^>]+)<\/CHROMO_LINK>/o) { push @{$model->{'CHROMO_LINK'}}, $1; $line = $self->_readline(); } if($line =~ /<DATE>([^>]+)<\/DATE>/o) { $model->{'DATE'} = $1; $line = $self->_readline(); } else { $self->throw("Invalid Date: $line"); } if($line =~ /<COORDSET>/o) { $self->_pushback($line); $model->{'COORDSET'} = $self->_process_coordset(); $line = $self->_readline(); } else { $self->throw("Invalid Coordset: $line"); } if($line =~ /<EXON>/o) { do { $self->_pushback($line); push(@{$model->{'EXON'}}, $self->_process_exon()); $line = $self->_readline(); } while($line =~ /<EXON>/o); } else { $self->throw("Required <EXON> missing"); } until($line =~ /<\/MODEL>/o) { $line = $self->_readline(); } return $model;
}
_process_cdna_supportdescriptionprevnextTop
sub _process_cdna_support {
	my($self) = @_;
	my $line = $self->_readline();

	if($line !~ /<CDNA_SUPPORT>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_cdna_support called ",
		             "but no <CDNA_SUPPORT> in the stream");
	}

	# TODO Add CDNA Support
do { $line = $self->_readline(); } while($line !~ /<\/CDNA_SUPPORT>/o);
}
_process_exondescriptionprevnextTop
sub _process_exon {
	my($self) = @_;
	my $line = $self->_readline();
	my $exon;

	if($line !~ /<EXON>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_exon called ",
		             "but no <EXON> in the stream");
	}

	$line = $self->_readtag();
	if($line =~ /<FEAT_NAME>([^<]+)<\/FEAT_NAME>/o) {
		$exon->{'FEAT_NAME'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <FEAT_NAME> missing");
	}

	if($line =~ /<DATE>([^<]+)<\/DATE>/o) {
		$exon->{'DATE'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <DATE> missing");
	}

	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$exon->{'COORDSET'} = $self->_process_coordset();
		$line = $self->_readline();
	} else {
		$self->throw("Required <COORDSET> missing");
	}

	if($line =~ /<CDS>/o) {
		$self->_pushback($line);
		$exon->{'CDS'} = $self->_process_cds();
		$line = $self->_readline();
	}

	if($line =~ /<UTRS>/o) {
		$self->_pushback($line);
		$exon->{'UTRS'} = $self->_process_utrs();
		$line = $self->_readline();
	}

	if($line =~ /<\/EXON>/o) {
		return $exon;
	}

	$self->throw("Reached End of Bio::SeqIO::tigr::_process_exon");
}
_process_cdsdescriptionprevnextTop
sub _process_cds {
	my($self) = @_;
	my $line = $self->_readline();
	my $cds;

	if($line !~ /<CDS>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_cda_support called ",
		             "but no <CDS> in the stream");
	}
	
	$line = $self->_readtag();
	if($line =~ /<FEAT_NAME>([^<]+)<\/FEAT_NAME>/o) {
		$cds->{'FEAT_NAME'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <FEAT_NAME> missing");
	}

	if($line =~ /<DATE>([^<]+)<\/DATE>/o) {
		$cds->{'DATE'} = $1;
		$line = $self->_readtag();
	} else {
		$self->throw("Required <DATE> missing");
	}

	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$cds->{'COORDSET'} = $self->_process_coordset();
		$line = $self->_readtag();
	} else {
		$self->throw("Required <COORDSET> missing");
	}

	if($line =~ /<\/CDS>/o) {
		return $cds;
	}

	$self->throw("Reached onf of Bio::SeqIO::tigr::_process_cds");
}
_process_utrsdescriptionprevnextTop
sub _process_utrs {
	my($self) = @_;
	my $line = $self->_readline();
	my $utrs;

	if($line !~ /<UTRS/o) {
		$self->throw("Bio::SeqIO::tigr::_process_utrs called but no ",
		             "<UTRS> found in stream");
	}

	$line = $self->_readline();
	while($line !~ /<\/UTRS>/o) {
		$self->_pushback($line);
		if($line =~ /<LEFT_UTR>/o) {
			push(@{$utrs->{'LEFT_UTR'}}, $self->_process_left_utr());
		} elsif ($line =~ /<RIGHT_UTR>/o) {
			push(@{$utrs->{'RIGHT_UTR'}}, $self->_process_right_utr());
		} elsif ($line =~ /<EXTENDED_UTR>/o) {
			push(@{$utrs->{'EXTENDED_UTR'}}, $self->_process_ext_utr());
		} else {
			$self->throw("Unexpected tag");
		}
	
		$line = $self->_readline();
	}

	if($line =~ /<\/UTRS>/o) {
		return $utrs;
	}
	$self->throw("Reached end of Bio::SeqIO::tigr::_process_utrs");
}
_process_left_utrdescriptionprevnextTop
sub _process_left_utr {
	my($self) = @_;
	my $line = $self->_readline();
	my $coordset;

	if($line !~ /<LEFT_UTR>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_left_utr called but ",
		             "no <LEFT_UTR> found in stream");
	}

	$line = $self->_readtag();
	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$coordset = $self->_process_coordset();
	} else {
		$self->throw("Required <COORDSET> missing");
	}

	$line = $self->_readline();
	if($line =~ /<\/LEFT_UTR>/o) {
		return $coordset;
	}
	$self->throw("Reached end of Bio::SeqIO::tigr::_process_left_utr");
}
_process_right_utrdescriptionprevnextTop
sub _process_right_utr {
	my($self) = @_;
	my $line = $self->_readline();
	my $coordset;

	if($line !~ /<RIGHT_UTR>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_right_utr called but ",
		             "no <RIGHT_UTR> found in stream");
	}

	$line = $self->_readtag();
	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$coordset = $self->_process_coordset();
	} else {
		$self->throw("Required <COORDSET> missing");
	}

	$line = $self->_readline();
	if($line =~ /<\/RIGHT_UTR>/o) {
		return $coordset;
	}
	$self->throw("Reached end of Bio::SeqIO::tigr::_process_right_utr");
}
_process_ext_utrdescriptionprevnextTop
sub _process_ext_utr {
	my($self) = @_;
	my $line = $self->_readline();
	my $coordset;

	if($line !~ /<EXTENDED_UTR>/o) {
		$self->throw("Bio::SeqIO::tigr::_process_ext_utr called but ",
		             "no <EXTENDED_UTR> found in stream");
	}

	$line = $self->_readtag();
	if($line =~ /<COORDSET>/o) {
		$self->_pushback($line);
		$coordset = $self->_process_coordset();
	} else {
		$self->throw("Required <COORDSET> missing");
	}

	$line = $self->_readline();
	if($line =~ /<\/EXTENDED_UTR>/o) {
		return $coordset;
	}
	$self->throw("Reached end of Bio::SeqIO::tigr::_process_ext_utr");
}
_readtagdescriptionprevnextTop
sub _readtag {
	my($self) = @_;
	my $line = $self->_readline();
	chomp($line);

	my $tag;
	if(($tag) = ($line =~ /^[^<]*<\/(\w+)/o)) {
		$self->_pushback($1) if $line =~ /<\/$tag>(.+)$/;
		return "</$tag>";
	}
 
	until(($tag) = ($line =~ /<(\w+)[^>]*>/o)) {
		$line = $self->_readline();
		chomp $line;
	}

	until($line =~ /<\/$tag>/) {
		$line .= $self->_readline();
	}

	if(my ($val) = ($line =~ /(<$tag.*>.*?<\/$tag>)/s)) {
		if($line =~ /<\/$tag>\s*(\w+[\s\w]*?)\s*$/s) {
			$self->_pushback($1)
		}
		return $val;
	}
	$self->throw("summerror");
}
_readlinedescriptionprevnextTop
sub _readline {
	my($self) = @_;
	my $line;
	do {
		$line = $self->SUPER::_readline();
	} while(defined($line) and $line =~ /^\s*$/o);

	return $line;
}
throwdescriptionprevnextTop
sub throw {
	my($self, @s) = @_;
	my $string = "[$.]" . join('', @s);
	$self->SUPER::throw($string);
}
General documentation
FEEDBACKTop
Mailing ListsTop
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
  bioperl-l@bioperl.org            - General discussion
  http://bioperl.org/MailList.shtml - About the mailing lists
Reporting BugsTop
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.
Bug reports can be submitted via email or the web:
  bioperl-bugs@bio.perl.org
  http://bio.perl.org/bioperl-bugs/
AUTHORS - Josh LaurichaTop
Email: laurichj@cs.ucr.edu
APPENDIXTop
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
next_seqTop
 Title   : next_seq
 Usage   : $seq = $stream->next_seq()
 Function: returns the next sequence in the stream
 Returns : Bio::Seq object
 Args    : NONE