| Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
use Bio::SearchIO;
use Bio::SearchIO::Writer::HTMLResultWriter;
my $in = Bio::SearchIO->new(-format => 'blast', -file => shift @ARGV); my $writer = Bio::SearchIO::Writer::HTMLResultWriter->new(); my $out = Bio::SearchIO->new(-writer => $writer); $out->write_result($in->next_result); # to filter your output my $MinLength = 100; # need a variable with scope outside the method sub hsp_filter { my $hsp = shift; return 1 if $hsp->length('total') > $MinLength; } sub result_filter { my $result = shift; return $hsp->num_hits > 0; } my $writer = Bio::SearchIO::Writer::HTMLResultWriter->new (-filters => { 'HSP' => \&hsp_filter} ); my $out = Bio::SearchIO->new(-writer => $writer); $out->write_result($in->next_result); # can also set the filter via the writer object $writer->filter('RESULT', \&result_filter);
| BEGIN | Code | |
| new | Description | Code |
| remote_database_url | Description | Code |
| to_string | Description | Code |
| hit_link_desc | Description | Code |
| default_hit_link_desc | Description | Code |
| hit_link_align | Description | Code |
| hit_desc_line | Description | Code |
| default_hit_desc_line | Description | Code |
| start_report | Description | Code |
| default_start_report | Description | Code |
| title | Description | Code |
| default_title | Description | Code |
| introduction | Description | Code |
| default_introduction | Description | Code |
| end_report | Description | Code |
| id_parser | Description | Code |
| default_id_parser | Description | Code |
| MAX | No description | Code |
| footer | No description | Code |
| algorithm_reference | Description | Code |
| _numwithcommas | No description | Code |
| no_wublastlinks | Description | Code |
| new | code | next | Top |
Title : new |
| remote_database_url | code | prev | next | Top |
Title : remote_database_url |
| to_string | code | prev | next | Top |
Purpose : Produces data for each Search::Result::ResultI in a string. |
| hit_link_desc | code | prev | next | Top |
Title : hit_link_desc |
| default_hit_link_desc | code | prev | next | Top |
Title : default_hit_link_descSee Also: hit_link_align, remote_database, id_parser |
| hit_link_align | code | prev | next | Top |
Title : hit_link_alignSee Also: hit_link_desc, remote_database, id_parser |
| hit_desc_line | code | prev | next | Top |
Title : hit_desc_line |
| default_hit_desc_line | code | prev | next | Top |
Title : default_hit_desc_line |
| start_report | code | prev | next | Top |
Title : start_report |
| default_start_report | code | prev | next | Top |
Title : default_start_report |
| title | code | prev | next | Top |
Title : title |
| default_title | code | prev | next | Top |
Title : default_title |
| introduction | code | prev | next | Top |
Title : introduction |
| default_introduction | code | prev | next | Top |
Title : default_introduction |
| end_report | code | prev | next | Top |
Title : end_report |
| id_parser | code | prev | next | Top |
Title : id_parser |
| default_id_parser | code | prev | next | Top |
Title : default_id_parser |
| algorithm_reference | code | prev | next | Top |
Title : algorithm_reference |
| no_wublastlinks | code | prev | next | Top |
Title : no_wublastlinks |
| BEGIN | Top |
$Revision = '$Id: HTMLResultWriter.pm 14697 2008-06-04 14:22:22Z heikki $'; $DATE = localtime(time); %RemoteURLDefault = ( 'PROTEIN' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=protein&cmd=search&term=%s', 'NUCLEOTIDE' => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=nucleotide&cmd=search&term=%s' ); $MaxDescLen = 60; $AlignmentLineWidth = 60;}
| new | description | prev | next | Top |
my($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($p,$n,$filters, $nowublastlinks) = $self->_rearrange([qw(PROTEIN_URL NUCLEOTIDE_URL FILTERS NO_WUBLASTLINKS)],@args); $self->remote_database_url('p',$p || $RemoteURLDefault{'PROTEIN'}); $self->remote_database_url('n',$n || $RemoteURLDefault{'NUCLEOTIDE'}); $self->no_wublastlinks(! $nowublastlinks); if( defined $filters ) { if( !ref($filters) =~ /HASH/i ) { $self->warn("Did not provide a hashref for the FILTERS option, ignoring."); } else { while( my ($type,$code) = each %{$filters} ) { $self->filter($type,$code); } } } return $self;}
| remote_database_url | description | prev | next | Top |
my ($self,$type,$value) = @_; if( ! defined $type || $type !~ /^(P|N)/i ) { $self->warn("Must provide a type (PROTEIN or NUCLEOTIDE)"); return ''; } $type = uc $1; if( defined $value) { $self->{'remote_database_url'}->{$type} = $value; } return $self->{'remote_database_url'}->{$type};}
| to_string | description | prev | next | Top |
my ($self,$result,$num) = @_; $num ||= 0; return unless defined $result; my $links = $self->no_wublastlinks; my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'), $self->filter('HIT'), $self->filter('HSP') ); return '' if( defined $resultfilter && ! &{$resultfilter}($result) ); my ($qtype,$dbtype,$dbseqtype,$type); my $alg = $result->algorithm; # This is actually wrong for the FASTAs I think}
if( $alg =~ /T(FAST|BLAST)([XY])/i ) { $qtype = $dbtype = 'translated'; $dbseqtype = $type = 'PROTEIN'; } elsif( $alg =~ /T(FAST|BLAST)N/i ) { $qtype = ''; $dbtype = 'translated'; $type = 'PROTEIN'; $dbseqtype = 'NUCLEOTIDE'; } elsif( $alg =~ /(FAST|BLAST)N/i || $alg =~ /(WABA|EXONERATE)/i ) { $qtype = $dbtype = ''; $type = $dbseqtype = 'NUCLEOTIDE'; } elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH|HMM(PFAM|SEARCH)/i ) { $qtype = $dbtype = ''; $type = $dbseqtype = 'PROTEIN'; } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) { $qtype = 'translated'; $dbtype = 'PROTEIN'; $dbseqtype = $type = 'PROTEIN'; } else { $self->warn("algorithm was ", $result->algorithm, " couldn't match\n"); } my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1, 'Query:' => ( $qtype eq 'translated' ) ? 3 : 1); my $str; if( $num <= 1 ) { $str = &{$self->start_report}($result); } $str .= &{$self->title}($result); $str .= $result->algorithm_reference || $self->algorithm_reference($result); $str .= &{$self->introduction}($result); $str .= "<table border=0> <tr><th>Sequences producing significant alignments:</th> <th>Score<br>(bits)</th><th>E<br>value</th></tr>"; my $hspstr = '<p><p>'; if( $result->can('rewind')) { $result->rewind(); # support stream based parsing routines
} while( my $hit = $result->next_hit ) { next if( $hitfilter && ! &{$hitfilter}($hit) ); my $nm = $hit->name(); $self->debug( "no $nm for name (".$hit->description(). "\n") unless $nm; my ($gi,$acc) = &{$self->id_parser}($nm); my $p = "%-$MaxDescLen". "s"; my $descsub; if( length($hit->description) > ($MaxDescLen - 3) ) { $descsub = sprintf($p, substr($hit->description,0,$MaxDescLen-3) . "..."); } else { $descsub = sprintf($p,$hit->description); } my $url_desc = &{$self->hit_link_desc()}($self,$hit, $result); my $url_align = &{$self->hit_link_align()}($self,$hit, $result); my @hsps = $hit->hsps; if( ! @hsps ) { # no HSPs so no link
$str .= sprintf('<tr><td>%s %s</td><td>%s</td><td>%.2g</td></tr>'."\n", $url_desc, $descsub, ($hit->bits ? $hit->bits : (defined $hsps[0] ? $hsps[0]->bits : ' ')), ( $hit->significance ? $hit->significance : (defined $hsps[0] ? $hsps[0]->evalue : ' ')) ); } else { # failover to first HSP if the data does not contain a
# bitscore/significance value for the Hit (NCBI XML data for one)
$str .= sprintf('<tr><td>%s %s</td><td>%s</td><td><a href="#%s">%.2g</a></td></tr>'."\n", $url_desc, $descsub, ($hit->bits ? $hit->bits : (defined $hsps[0] ? $hsps[0]->bits : ' ')), $acc, ( $hit->significance ? $hit->significance : (defined $hsps[0] ? $hsps[0]->evalue : ' ')) ); my $dline = &{$self->hit_desc_line}($self, $hit, $result); $hspstr .= "<a name=\"$acc\">\n". sprintf("><b>%s</b> %s</br><dd>Length = %s</dd><p>\n\n", $url_align, $dline , &_numwithcommas($hit->length)); my $ct = 0; foreach my $hsp (@hsps ) { next if( $hspfilter && ! &{$hspfilter}($hsp) ); $hspstr .= sprintf(" Score = %s bits (%s), Expect = %s", $hsp->bits || $hsp->score, $hsp->score || $hsp->bits, $hsp->evalue || ''); if( defined $hsp->pvalue ) { $hspstr .= ", P = ".$hsp->pvalue; } $hspstr .= "<br>\n"; $hspstr .= sprintf(" Identities = %d/%d (%d%%)", ( $hsp->frac_identical('total') * $hsp->length('total')), $hsp->length('total'), $hsp->frac_identical('total') * 100); if( $type eq 'PROTEIN' ) { $hspstr .= sprintf(", Positives = %d/%d (%d%%)", ( $hsp->frac_conserved('total') * $hsp->length('total')), $hsp->length('total'), $hsp->frac_conserved('total') * 100); } if( $hsp->gaps ) { $hspstr .= sprintf(", Gaps = %d/%d (%d%%)", $hsp->gaps('total'), $hsp->length('total'), (100 * $hsp->gaps('total') /
$hsp->length('total')));
} my ($hframe,$qframe) = ( $hsp->hit->frame, $hsp->query->frame); my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand); # so TBLASTX will have Query/Hit frames
# BLASTX will have Query frame
# TBLASTN will have Hit frame
if( $hstrand || $qstrand ) { $hspstr .= ", Frame = "; my ($signq, $signh); unless( $hstrand ) { $hframe = undef; # if strand is null or 0 then it is protein
# and this no frame
} else { $signh = $hstrand < 0 ? '-' : '+'; } unless( $qstrand ) { $qframe = undef; # if strand is null or 0 then it is protein
} else { $signq =$qstrand < 0 ? '-' : '+'; } # remember bioperl stores frames as 0,1,2 (GFF way)
# BLAST reports reports as 1,2,3 so
# we have to add 1 to the frame values
if( defined $hframe && ! defined $qframe) { $hspstr .= "$signh".($hframe+1); } elsif( defined $qframe && ! defined $hframe) { $hspstr .= "$signq".($qframe+1); } else { $hspstr .= sprintf(" %s%d / %s%d", $signq,$qframe+1, $signh, $hframe+1); } } if($links && $hsp->can('links') && defined(my $lnks = $hsp->links) ) { $hspstr .= sprintf("<br>\nLinks = %s\n",$lnks); } $hspstr .= "</a><p>\n<pre>"; my @hspvals = ( {'name' => 'Query:', 'seq' => $hsp->query_string, 'start' => ($qstrand >= 0 ? $hsp->query->start : $hsp->query->end), 'end' => ($qstrand >= 0 ? $hsp->query->end : $hsp->query->start), 'index' => 0, 'direction' => $qstrand || 1 }, { 'name' => ' 'x6, 'seq' => $hsp->homology_string, 'start' => undef, 'end' => undef, 'index' => 0, 'direction' => 1 }, { 'name' => 'Sbjct:', 'seq' => $hsp->hit_string, 'start' => ($hstrand >= 0 ? $hsp->hit->start : $hsp->hit->end), 'end' => ($hstrand >= 0 ? $hsp->hit->end : $hsp->hit->start), 'index' => 0, 'direction' => $hstrand || 1 } ); # let's set the expected length (in chars) of the starting number
# in an alignment block so we can have things line up
# Just going to try and set to the largest
my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}), length($hspvals[0]->{'end'}), length($hspvals[2]->{'start'}), length($hspvals[2]->{'end'})); my $count = 0; while ( $count < $hsp->length('total') ) { foreach my $v ( @hspvals ) { my $piece = substr($v->{'seq'}, $v->{'index'} + $count, $AlignmentLineWidth); my $cp = $piece; my $plen = scalar ( $cp =~ tr/\-//); my ($start,$end) = ('',''); if( defined $v->{'start'} ) { $start = $v->{'start'}; # since strand can be + or - use the direction
# to signify which whether to add or substract from end
my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )* $baselens{$v->{'name'}}; if( length($piece) < $AlignmentLineWidth ) { $d = (length($piece) - $plen) * $v->{'direction'} * $baselens{$v->{'name'}}; } $end = $v->{'start'} + $d - $v->{'direction'}; $v->{'start'} += $d; } $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n", $v->{'name'}, $start, $piece, $end ); } $count += $AlignmentLineWidth; $hspstr .= "\n\n"; } $hspstr .= "</pre>\n"; } } # $hspstr .= "</pre>\n";
} $str .= "</table><p>\n".$hspstr; my ($pav, $sav) = ($result->available_parameters, $result->available_statistics); if ($pav || $sav) { # make table of search statistics and end the web page
$str .= "<p><p><hr><h2>Search Parameters</h2>"; if ($pav) { $str .= "<table border=1><tr><th>Parameter</th><th>Value</th>\n"; foreach my $param ( sort $result->available_parameters ) { $str .= "<tr><td>$param</td><td>". $result->get_parameter($param) ."</td></tr>\n"; } $str .= "</table>"; } if ($sav) { $str .= "<p><h2>Search Statistics</h2><table border=1><tr><th>Statistic</th><th>Value</th></tr>\n"; foreach my $stat ( sort $result->available_statistics ) { $str .= "<tr><td>$stat</td><td>". $result->get_statistic($stat). "</td>\n"; } $str .= "</tr></table>"; } } $str .= $self->footer() . "<P>\n"; return $str;
| hit_link_desc | description | prev | next | Top |
my( $self, $code ) = @_; if ($code) { $self->{'_hit_link_desc'} = $code; } return $self->{'_hit_link_desc'} ||\& default_hit_link_desc;}
| default_hit_link_desc | description | prev | next | Top |
my($self, $hit, $result) = @_; my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE'; my ($gi,$acc) = &{$self->id_parser}($hit->name); my $url = length($self->remote_database_url($type)) > 0 ? sprintf('<a href="%s">%s</a>', sprintf($self->remote_database_url($type),$gi || $acc), $hit->name()) : $hit->name(); return $url;}
| hit_link_align | description | prev | next | Top |
my ($self,$code) = @_; if ($code) { $self->{'_hit_link_align'} = $code; } return $self->{'_hit_link_align'} ||\& default_hit_link_desc;}
| hit_desc_line | description | prev | next | Top |
my( $self, $code ) = @_; if ($code) { $self->{'_hit_desc_line'} = $code; } return $self->{'_hit_desc_line'} ||\& default_hit_desc_line;}
| default_hit_desc_line | description | prev | next | Top |
my($self, $hit, $result) = @_; my $type = ( $result->algorithm =~ /(P|X|Y)$/i ) ? 'PROTEIN' : 'NUCLEOTIDE'; my @descs = split /\x01/, $hit->description; #my $descline = join("</br>",@descs)."</br>";}
my $descline = ''; #return $descline;
for my $sec (@descs) { my $url = ''; if ($sec =~ s/((?:gi\|(\d+)\|)? # optional GI
(\w+)\|([A-Z\d\.\_]+) # main
(\|[A-Z\d\_]+)?) # optional secondary ID//xms) { my ($name, $gi, $db, $acc) = ($1, $2, $3, $4); #$acc ||= ($rest) ? $rest : $gi;
$acc =~ s/^\s+(\S+)/$1/; $acc =~ s/(\S+)\s+$/$1/; $url = length($self->remote_database_url($type)) > 0 ? sprintf('<a href="%s">%s</a> %s', sprintf($self->remote_database_url($type), $gi || $acc || $db), $name, $sec) : $sec; } else { $url = $sec; } $descline .= "$url</br>\n"; } return $descline;
| start_report | description | prev | next | Top |
my( $self, $code ) = @_; if ($code) { $self->{'_start_report'} = $code; } return $self->{'_start_report'} ||\& default_start_report;}
| default_start_report | description | prev | next | Top |
my ($result) = @_; return sprintf( qq{<HTML> <HEAD> <CENTER><TITLE>Bioperl Reformatted HTML of %s output with Bioperl Bio::SearchIO system</TITLE></CENTER></HEAD> <!-------------------------------------------------------------------> <!-- Generated by Bio::SearchIO::Writer::HTMLResultWriter --> <!-- %s --> <!-- http://bioperl.org --> <!-------------------------------------------------------------------> <BODY BGCOLOR="WHITE"> },$result->algorithm,$Revision);}
| title | description | prev | next | Top |
my( $self, $code ) = @_; if ($code) { $self->{'_title'} = $code; } return $self->{'_title'} ||\& default_title;}
| default_title | description | prev | next | Top |
my ($result) = @_; return sprintf( qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>}, $result->algorithm, $result->query_name());}
| introduction | description | prev | next | Top |
my( $self, $code ) = @_; if ($code) { $self->{'_introduction'} = $code; } return $self->{'_introduction'} ||\& default_introduction;}
| default_introduction | description | prev | next | Top |
my ($result) = @_; return sprintf( qq{ <b>Query=</b> %s %s<br><dd>(%s letters)</dd> <p> <b>Database:</b> %s<br><dd>%s sequences; %s total letters<p></dd> <p> }, $result->query_name, $result->query_description, &_numwithcommas($result->query_length), $result->database_name(), &_numwithcommas($result->database_entries()), &_numwithcommas($result->database_letters()), );}
| end_report | description | prev | next | Top |
return "</BODY>\n</HTML>\n"; } # copied from Bio::Index::Fasta}
# useful here as well
| id_parser | description | prev | next | Top |
my( $self, $code ) = @_; if ($code) { $self->{'_id_parser'} = $code; } return $self->{'_id_parser'} ||\& default_id_parser;}
| default_id_parser | description | prev | next | Top |
my ($string) = @_; my ($gi,$acc); if( $string =~ s/gi\|(\d+)\|?// ) { $gi = $1; $acc = $1;} if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) { $acc = defined $2 ? $2 : $1; } else { $acc = $string; $acc =~ s/^\s+(\S+)/$1/; $acc =~ s/(\S+)\s+$/$1/; } return ($gi,$acc);}
| MAX | description | prev | next | Top |
$a <=> $b ? $b : $a;}
| footer | description | prev | next | Top |
my ($self) = @_; return "<hr><h5>Produced by Bioperl module ".ref($self)." on $DATE<br>Revision: $Revision</h5>\n"}
| algorithm_reference | description | prev | next | Top |
my ($self,$result) = @_; return '' if( ! defined $result || !ref($result) || ! $result->isa('Bio::Search::Result::ResultI')) ; if( $result->algorithm =~ /BLAST/i ) { my $res = $result->algorithm . ' ' . $result->algorithm_version . "<p>"; if( $result->algorithm_version =~ /WashU/i ) { return $res . "Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.<br> All Rights Reserved.<p> <b>Reference:</b> Gish, W. (1996-2000) <a href=\"http://blast.wustl.edu\">http://blast.wustl.edu</a><p>"; } else { return $res . "<b>Reference:</b> Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,<br> Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),<br>\" Gapped BLAST and PSI-BLAST: a new generation of protein database search<br> programs\", Nucleic Acids Res. 25:3389-3402.<p>"; } } elsif( $result->algorithm =~ /FAST/i ) { return $result->algorithm . " " . $result->algorithm_version . "<br>" . "\n<b>Reference:</b> Pearson et al, Genomics (1997) 46:24-36<p>"; } else { return ''; } } # from Perl Cookbook 2.17}
| _numwithcommas | description | prev | next | Top |
my $num = reverse( $_[0] ); $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $num;}
| no_wublastlinks | description | prev | next | Top |
my $self = shift; return $self->{'no_wublastlinks'} = shift if @_; return $self->{'no_wublastlinks'}; } 1;}
| FEEDBACK | Top |
| Mailing Lists | Top |
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
| Reporting Bugs | Top |
http://bugzilla.open-bio.org/
| AUTHOR - Jason Stajich | Top |
| CONTRIBUTORS | Top |
| APPENDIX | Top |
| Methods Bio::SearchIO::SearchWriterI | Top |
| filter | Top |
Title : filter
Usage : $writer->filter('hsp', \&hsp_filter);
Function: Filter out either at HSP,Hit,or Result level
Returns : none
Args : string => data type,
CODE reference