BEGIN {
%MODEMAP = (
'HMMER_Output' => 'result',
'Hit' => 'hit',
'Hsp' => 'hsp'
);
%MAPPING = (
'Hsp_bit-score' => 'HSP-bits',
'Hsp_score' => 'HSP-score',
'Hsp_evalue' => 'HSP-evalue',
'Hsp_query-from' => 'HSP-query_start',
'Hsp_query-to' => 'HSP-query_end',
'Hsp_hit-from' => 'HSP-hit_start',
'Hsp_hit-to' => 'HSP-hit_end',
'Hsp_positive' => 'HSP-conserved',
'Hsp_identity' => 'HSP-identical',
'Hsp_gaps' => 'HSP-hsp_gaps',
'Hsp_hitgaps' => 'HSP-hit_gaps',
'Hsp_querygaps' => 'HSP-query_gaps',
'Hsp_qseq' => 'HSP-query_seq',
'Hsp_hseq' => 'HSP-hit_seq',
'Hsp_midline' => 'HSP-homology_seq',
'Hsp_align-len' => 'HSP-hsp_length',
'Hsp_query-frame' => 'HSP-query_frame',
'Hsp_hit-frame' => 'HSP-hit_frame',
'Hit_id' => 'HIT-name',
'Hit_len' => 'HIT-length',
'Hit_accession' => 'HIT-accession',
'Hit_desc' => 'HIT-description',
'Hit_signif' => 'HIT-significance',
'Hit_score' => 'HIT-score',
'HMMER_program' => 'RESULT-algorithm_name',
'HMMER_version' => 'RESULT-algorithm_version',
'HMMER_query-def' => 'RESULT-query_name',
'HMMER_query-len' => 'RESULT-query_length',
'HMMER_query-acc' => 'RESULT-query_accession',
'HMMER_querydesc' => 'RESULT-query_description',
'HMMER_hmm' => 'RESULT-hmm_name',
'HMMER_seqfile' => 'RESULT-sequence_file',
'HMMER_db' => 'RESULT-database_name',
);} |
sub next_result
{ my ($self) = @_;
my $seentop = 0; my $reporttype;
my ( $last, @hit_list, @hsp_list, %hspinfo, %hitinfo, %domaincounter );
local $/ = "\n";
local $_;
my $verbose = $self->verbose; $self->start_document();
local ($_);
if(!defined( $_ = $self->_readline) ) {
return undef;
}
else{
$self->_pushback($_);
}
while ( defined( $_ = $self->_readline ) ) {
my $lineorig = $_;
chomp;
if ( $_ =~ m/^\#\s(\S+)\s\:\:\s/ ){ my $prog = $1; $self->start_element( { 'Name' => 'HMMER_Output' } );
$self->{'_result_count'}++; $self->element(
{
'Name' => 'HMMER_program',
'Data' => uc($prog)
}
);
}
elsif ( $_ =~ m/^\#\sHMMER\s+(\S+)\s+\((.+)\)/ ) { my $version = $1; my $versiondate = $2;
$self->{'_hmmidline'} = $_;
$self->element(
{
'Name' => 'HMMER_version',
'Data' => $version
}
);
}
elsif( $_ =~ /^\#\squery \w+ file\:\s+(\S+)/ ){
if( $self->{'_reporttype'} eq 'HMMSEARCH') {
$self->{'_hmmfileline'} = $lineorig;
$self->element(
{
'Name' => 'HMMER_hmm',
'Data' => $1
}
);
}
elsif( $self->{'_reporttype'} eq 'HMMSCAN' ) {
$self->{'_hmmseqline'} = $lineorig;
$self->element(
{
'Name' => 'HMMER_seqfile',
'Data' => $1
}
);
}
}
elsif( $_ =~ m/^\#\sshow\salignments\sin\soutput/ ){ $self->{'_alnreport'} = 0; }
elsif( $_ =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ){ # $self->{'_hmmseqline'} = $lineorig;
if( $self->{'_reporttype'} eq 'HMMSEARCH') {
$self->{'_hmmseqline'} = $lineorig;
$self->element(
{
'Name' => 'HMMER_seqfile',
'Data' => $1
}
);
}
elsif( $self->{'_reporttype'} eq 'HMMSCAN' ) {
$self->{'_hmmfileline'} = $lineorig;
$self->element(
{
'Name' => 'HMMER_hmm',
'Data' => $1
}
);
}
}
elsif( $_ =~ s/^Query:\s+// ) {
unless( s/\s+\[[L|M]\=(\d+)\]$// ){
warn "Error parsing length for query, offending line $_\n";
exit(0);
}
my $querylen = $1;
$self->element(
{
'Name' => 'HMMER_query-len',
'Data' => $querylen
}
);
$self->element(
{
'Name' => 'HMMER_query-def',
'Data' => $_
}
);
}
elsif( $_ =~ s/^Accession:\s+// ){
s/\s+$//;
$self->element(
{
'Name' => 'HMMER_query-acc',
'Data' => $_
}
);
}
elsif( $_ =~ s/^Description:\s+// ){
s/\s+$//;
$self->element(
{
'Name' => 'HMMER_querydesc',
'Data' => $_
}
);
}
elsif( defined $self->{'_reporttype'} &&
(
$self->{'_reporttype'} eq 'HMMSEARCH' ||
$self->{'_reporttype'} eq 'HMMSCAN'
)
){
if( $_ =~ m/Scores for complete sequence/){ while (defined( $_ = $self->_readline ) ) { if ($_ =~ m/inclusion threshold/ || m/Domain( and alignment)? annotation for each/ || m/\[No hits detected/ || m!^//! ){ $self->_pushback($_); last;
}
next if ( m/\-\-\-/ || m/^\s+E\-value\s+score/ || m/^$/); my (
$eval_full, $score_full, $bias_full,
$eval_best, $score_best, $bias_best,
$exp, $n, $hitid, $desc, @hitline
);
@hitline = split(" ", $_);
$eval_full = shift @hitline;
$score_full = shift @hitline;
$bias_full = shift @hitline;
$eval_best = shift @hitline;
$score_best = shift @hitline;
$bias_best = shift @hitline;
$exp = shift @hitline;
$n = shift @hitline;
$hitid = shift @hitline;
$desc = join " ", @hitline;
if( !defined( $desc ) ){
$desc = "";
}
push @hit_list, [ $hitid, $desc, $eval_full, $score_full ];
$hitinfo{$hitid}= $#hit_list;
}
}
elsif( $_ =~ m/inclusion threshold/ ){ while( defined( $_ = $self->_readline ) ) { if( $_ =~ m/Domain( and alignment)? annotation for each/ || m/Internal pipeline statistics summary/ ){ $self->_pushback($_); last;
}
next if( $_ =~ m/^$/ ); my (
$eval_full, $score_full, $bias_full,
$eval_best, $score_best, $bias_best,
$exp, $n, $hitid, $desc, @hitline
);
@hitline = split(" ", $_);
$eval_full = shift @hitline;
$score_full = shift @hitline;
$bias_full = shift @hitline;
$eval_best = shift @hitline;
$score_best = shift @hitline;
$bias_best = shift @hitline;
$exp = shift @hitline;
$n = shift @hitline;
$hitid = shift @hitline;
$desc = join " ", @hitline;
$hitinfo{$hitid} = "below_inclusion";
}
}
elsif( $_ =~ m/Domain( and alignment)? annotation for each/){ @hsp_list = (); #here for multi-query reports my $name;
while( defined( $_ = $self->_readline ) ) {
if ($_ =~ m/Internal pipeline statistics/ || m/\[No targets detected/ ){ $self->_pushback($_); last;
}
if( $_ =~ m/^\>\>\s(.*?)\s+/ ) { $name = $1; next if( $hitinfo{$name} eq "below_inclusion");
$domaincounter{$name} = 0;
while( defined( $_ = $self->_readline ) ) {
if ($_ =~ m/Internal pipeline statistics/ || $_ =~ m/^\>\>/ ){ $self->_pushback($_); last;
}
if ( $_ =~ m/Alignments for each domain/ ) { $self->_pushback($_); last;
}
if ( $_ =~ m/^\s+\#\s+score/ || $_ =~ m/^\s\-\-\-\s+/ || # $_ =~ m/^\>\>/ || $_ =~ m/^$/ ){ next; }
if(
my ($domain_num, $score, $bias, $ceval,
$ieval, $hmmstart, $hmmstop,
$qalistart, $qalistop, $envstart,
$envstop, $envbound, $acc) =
m!^\s+(\d+)\s\!*\?*\s+ #domain num (\S+)\s+(\S+)\s+ #score, bias (\S+)\s+(\S+)\s+ #c-eval, i-eval (\d+)\s+(\d+).+? #hmm start, stop (\d+)\s+(\d+).+? #query start, stop (\d+)\s+(\d+).+? #env start, stop (\S+) #acc \s*$!ox ){ #keeping simple for now. let's customize later my @vals = ($hmmstart, $hmmstop, $qalistart, $qalistop, $score, $ceval, '', '', ''); my $info = $hit_list[ $hitinfo{$name} ];
if( !defined $info ){
$self->warn(
"Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n"
);
next;
}
$domaincounter{$name}++;
my $hsp_key = $name . "_" . $domaincounter{$name};
push @hsp_list, [ $name, @vals ];
$hspinfo{$hsp_key} = $#hsp_list;
}
else{
print "missed this line: $_\n";
}
}
}
elsif ($_ =~ m/Alignments for each domain/ ) { my $domain_count = 0; my $count = 0;
my $max_count = 3;
my $lastdomain;
my $hsp;
my ($hline, $midline, $qline);
while( defined( $_ = $self->_readline ) ) {
if( $_ =~ m/^\>\>/ || $_ =~ m/Internal pipeline statistics/){ $self->_pushback($_); last;
}
elsif( $hitinfo{$name} eq "below_inclusion" ||
$_ =~ m/^$/ ) { next; }
elsif( $_ =~ /\s\s\=\=\sdomain\s(\d+)\s+/){
my $domainnum = $1;
$count = 0;
my $key = $name . "_" . $domainnum;
$hsp = $hsp_list[ $hspinfo{$key} ];
$hline = $$hsp[-3];
$midline = $$hsp[-2];
$qline = $$hsp[-1];
$lastdomain = $name;
}
elsif( $_ =~ m/\s+\S+\sCS$/ ){ my $modeltrack = $_; $max_count++;
$count++;
next;
}
elsif( $count == $max_count - 3 ){
my @data = split(" ", $_);
my $seq = $data[-2];
$hline .= $seq;
$count++;
next;
}
elsif( $count == $max_count - 2 ){
$_ =~ s/^\s+//;
$_ =~ s/\s+$//;
$midline .= $_;
$count++;
next;
}
elsif( $count == $max_count - 1 ){
my @data = split(" ", $_);
my $seq = $data[-2];
$qline .= $seq;
$count++;
next;
}
elsif( $count == $max_count ){
my $pvals = $_;
$count = 0;
$max_count = 3;
$$hsp[-3] = $hline;
$$hsp[-2] = $midline;
$$hsp[-1] = $qline;
next;
}
else{
print "missed $_\n";
}
}
}
}
}
elsif( m/Internal pipeline statistics/ || m!^//! ){ # if within hit, hsp close; if ( $self->within_element('hit') ) {
if ( $self->within_element('hsp') ) {
$self->end_element( { 'Name' => 'Hsp' } );
}
$self->end_element( { 'Name' => 'Hit' } );
}
while( defined( $_ = $self->_readline ) ) {
last if ( $_ =~ m/^\/\/$/ ); }
while( my $hit = shift @hit_list ) {
my $hit_name = shift @$hit;
my $hit_desc = shift @$hit;
my $hit_signif = shift @$hit;
my $hit_score = shift @$hit;
my $num_domains = $domaincounter{$hit_name} || 0;
$self->start_element( { 'Name' => 'Hit' } );
$self->element(
{
'Name' => 'Hit_id',
'Data' => $hit_name
}
);
$self->element(
{
'Name' => 'Hit_desc',
'Data' => $hit_desc
}
);
$self->element(
{
'Name' => 'Hit_signif',
'Data' => $hit_signif
}
);
$self->element(
{
'Name' => 'Hit_score',
'Data' => $hit_score
}
);
for my $i (1..$num_domains) {
my $key = $hit_name . "_" . $i;
my $hsp = $hsp_list[ $hspinfo{$key} ];
if(defined $hsp) {
my $hsp_name = shift @$hsp;
$self->start_element( { 'Name' => 'Hsp' } );
$self->element( {
'Name' => 'Hsp_identity',
'Data' => 0
} );
$self->element( {
'Name' => 'Hsp_positive',
'Data' => 0
} );
$self->element( {
'Name' => 'Hsp_hit-from',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_hit-to',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_query-from',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_query-to',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_score',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_evalue',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_hseq',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_midline',
'Data' => shift @$hsp
} );
$self->element( {
'Name' => 'Hsp_qseq',
'Data' => shift @$hsp
} );
$self->end_element( { 'Name' => 'Hsp' } );
}
}
$self->end_element( { 'Name' => 'Hit' } );
}
@hit_list = ();
%hitinfo = ();
last;
}
}
else{
print "missed: $_\n";
$self->debug($_);
}
$last = $_;
}
$self->end_element( { 'Name' => 'HMMER_Output' } );
my $result = $self->end_document();
return $result; } |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _