Bio::Tools::Run
Vista
Toolbar
Summary
Bio::Tools::Run::Vista
Wrapper for Vista
Package variables
No package variables defined.
Included modules
Bio::Root::IO
Bio::Root::Root
Bio::Seq
Bio::Tools::Run::WrapperBase
File::Copy
Inherit
Bio::Root::Root Bio::Tools::Run::WrapperBase
Synopsis
use Bio::Tools::Run::Vista;
use Bio::Tools::Run::Alignment::Lagan;
use Bio::AlignIO;
my $sio = Bio::SeqIO->new(-file=>$ARGV[0],-format=>'genbank');
my @seq;
my $reference = $sio->next_seq;
push @seq, $reference;
while(my $seq = $sio->next_seq){
push @seq,$seq;
}
my @features = grep{$_->primary_tag eq 'CDS'} $reference->get_SeqFeatures;
my $lagan = Bio::Tools::Run::Alignment::Lagan->new;
my $aln = $lagan->mlagan(\@seq,'(fugu (mouse human))');
my $vis = Bio::Tools::Run::Vista->new('outfile'=>"outfile.pdf",
'title' => "My Vista Plot",
'annotation'=>\@features,
'annotation_format'=>'GFF',
'min_perc_id'=>75,
'min_length'=>100,
'plotmin' => 50,
'tickdist' => 2000,
'window'=>40,
'numwindows'=>4,
'start'=>50,
'end'=>1500,
'tickdist'=>100,
'bases'=>1000,
'java_param'=>"-Xmx128m",
'num_pages'=>1,
'color'=> {'EXON'=>'100 0 0',
'CNS'=>'0 0 100'},
'quiet'=>1);
my $referenceid= 'human';
$vis->run($aln,$referenceid);
#alternative one can choose pairwise alignments to plot
#where the second id in each pair is the reference sequence
$vis->run($aln,([mouse,human],[fugu,human],[mouse,fugu]));
Description
Pls see Vista documentation for plotfile options
Wrapper for Vista :
C. Mayor, M. Brudno, J. R. Schwartz, A. Poliakov, E. M. Rubin, K. A. Frazer,
L. S. Pachter, I. Dubchak.
VISTA: Visualizing global DNA sequence alignments of arbitrary length.
Bioinformatics, 2000 Nov;16(11):1046-1047.
Get it here:
http://www-gsd.lbl.gov/vista/VISTAdownload2.htmlOn the command line, it is assumed that this can be executed:
java Vista plotfile
Some of the code was adapted from MLAGAN toolkit
M. Brudno, C.B. Do, G. Cooper, M.F. Kim, E. Davydov, NISC Sequencing Consortium,
E.D. Green, A. Sidow and S. Batzoglou
LAGAN and Multi-LAGAN: Efficient Tools for Large-Scale Multiple Alignment of Genomic
DNA, Genome Research, in press
get lagan here:
http://lagan.stanford.edu/
Methods
Methods description
Title : new Usage : my $vis = Bio::Tools::Run::Vista->new('outfile'=>$out, 'title' => "My Vista Plot", 'annotation'=>\@features, 'annotation_format'=>'GFF', 'min_perc_id'=>75, 'min_length'=>100, 'plotmin' => 50, 'tickdist' => 2000, 'window'=>40, 'numwindows'=>4, 'start'=>50, 'end'=>1500, 'tickdist'=>100, 'bases'=>1000, 'color'=> {'EXON'=>'100 0 0', 'CNS'=>'0 0 100'}, 'quiet'=>1); Function: Construtor for Vista wrapper Args : outfile - location of the pdf generated annotation - either a file or and array ref of Bio::SeqFeatureI indicating the exons regmin -region min |
Title : java Usage : $obj->java('/usr/opt/java130/bin/java'); Function: Get/set method for the location of java VM Args : File path (optional) |
Title : run Usage : my @genes = $self->run($seq) Function: runs Vista Returns : A boolean 1 if no errors Args : Argument 1: Bio::Align::Align required Argument 2: a string or number, which is the sequence id of the reference sequence or the rank of the sequence in the alignment |
Title : _setinput Usage : Internal function, not to be called directly Function: writes input sequence to file and return the file name Example : Returns : string Args : |
Methods code
BEGIN { $PROGRAMNAME = 'java';
if( ! defined $PROGRAMDIR ) {
$PROGRAMDIR = $ENV{'JAVA_HOME'} || $ENV{'JAVA_DIR'};
}
if (defined $PROGRAMDIR) {
foreach my $progname ( [qw(java)],[qw(bin java)] ) {
my $f = Bio::Root::IO->catfile($PROGRAMDIR, @$progname);
if( -e $f && -x $f ) {
$PROGRAM = $f;
last;
}
}
}
%DEFAULT_VALUES= ('java' => 'java',
'min_perc_id' => 75,
'min_length' => 100,
'plotmin' => 50,
'bases' => 10000,
'tickdist' => 2000,
'resolution'=> 25,
'window' => 40,
'title' => 'VISTA PLOT',
'numwindows'=>4);
@VISTA_PARAMS=qw(JAVA JAVA_PARAM OUTFILE MIN_PERC_ID QUIET VERBOSE ANNOTATION_FORMAT
REGION_FILE REGION_FILE_DIR SCORE_FILE SCORE_FILE_DIR ALIGNMENT_FILE_DIR
ALIGNMENT_FILE CONTIGS_FILE DIFFS PLOTFILE
MIN_LENGTH PLOTMIN ANNOTATION BASES TICKDIST RESOLUTION TITLE PAPER
WINDOW NUMWINDOWS START END NUM_PLOT_LINES LEGEND FILENAME NUM_PAGES
AXIS_LABEL TICKS_FILE COLOR USE_ORDER GAPS SNPS_FILE REPEATS_FILE
FILTER_REPEATS);
foreach my $attr ( @VISTA_PARAMS)
{ $OK_FIELD{$attr}++;} |
sub AUTOLOAD
{ my $self = shift;
my $attr = $AUTOLOAD;
$self->debug( "************ attr: $attr\n");
$attr =~ s/.*:://;
$attr = uc $attr;
$self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
$self->{$attr} = shift if @_;
return $self->{$attr};} |
sub new
{ my ($caller, @args) = @_;
my $self = $caller->SUPER::new(@args);
foreach my $key(keys %DEFAULT_VALUES){
$self->$key($DEFAULT_VALUES{$key});
}
while (@args) {
my $attr = shift @args;
my $value = shift @args;
next if( $attr =~ /^-/ ); $self->$attr($value);
}
return $self;} |
sub executable
{ shift->java(@_); } |
sub java
{ my ($self, $exe,$warn) = @_;
if( defined $exe ) {
$self->{'_pathtojava'} = $exe;
}
unless( defined $self->{'_pathtojava'} ) {
if( $PROGRAM && -e $PROGRAM && -x $PROGRAM ) {
$self->{'_pathtojava'} = $PROGRAM;
} else {
my $exe;
if( ( $exe = $self->io->exists_exe($PROGRAMNAME) ) &&
-x $exe ) {
$self->{'_pathtojava'} = $exe;
} else {
$self->warn("Cannot find executable for $PROGRAMNAME") if $warn;
$self->{'_pathtojava'} = undef;
}
}
}
$self->{'_pathtojava'};} |
sub run
{ my ($self,$align,$ref) = @_;
$ref ||=1;
my $infile = $self->_setinput($align,$ref);
return $self->_run_Vista($infile);} |
sub _setinput
{ my ($self,$sim_aln,$ref) = @_;
my($pairs,$files) = $self->_mf2bin($sim_aln,$ref);
my $plotfile = $self->_make_plotfile($sim_aln,$pairs,$files);
return $plotfile;} |
sub _parse_multi_fasta
{ my ($self,$file) = @_;
my %seq;
open(FASTA, $file) || $self->throw("Couldn't open $file");
my $last;
my $count = 0;
while (my $line = <FASTA>) {
chomp $line;
next if $line=~/^$/;
if (substr($line, 0, 1) eq ">") {
$_ = substr($line, 1);
/\w+/g;
$seq{$&} = "";
$last = $&;
} else {
$seq{$last}.=$line;
}
print STDERR $count."\n";
$count++;
}
my @seq;
foreach my $key(keys %seq){
my $seq = Bio::Seq->new(-id=>$key,-seq=>$seq{$key});
push @seq,$seq;
}
return @seq;
}
} |
sub _mf2bin
{ my ($self,$sim,$ref)= @_;
my @seq;
if(!ref $sim){
@seq = $self->_parse_multi_fasta($sim);
}
else {
($sim && $sim->isa("Bio::Align::AlignI")) || $self->throw("Expecting a Bio::Align::AlignI");
@seq = $sim->each_seq;
}
my $reference;
my @files;
my @pairs;
if(ref($ref) eq 'ARRAY'){
my @ref;
foreach my $set(@$ref){
my ($reference) = grep{$_->id eq $set->[1]}@seq;
my ($other) = grep{$_->id eq $set->[0]}@seq;
my ($pair,$file) = $self->_pack_bin($reference,$other);
push @pairs, @$pair;
push @files, @$file;
push @ref,$set->[1];
}
$self->_coordinate(\@ref);
return\@ pairs,\@files;
}
elsif($ref =~/^\d+$/){ $reference = $seq[$ref-1];
my $tmp = $ref;
$ref = $reference->id;
splice @seq,($tmp-1),1;
}
else { foreach my $i(0..$#seq){
if($seq[$i]->id =~/$ref/){
$reference = $seq[$i];
splice @seq,($i),1;
last;
}
}
}
$self->_coordinate([$ref]);
my %base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5,
'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5);
my @ref= (split ('',$reference->seq));
foreach my $seq2(@seq){
my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir);
my @seq2= (split('', $seq2->seq));
foreach my $index(0..$#ref){
unless($ref[$index] eq '-' && $seq2[$index] eq '-'){
print $tfh1 pack("H2",$base_code{$ref[$index]}.$base_code{$seq2[$index]});
}
}
close ($tfh1);
undef ($tfh1);
push @files, $outfile;
push @pairs,[$reference->id,$seq2->id];
}
return\@ pairs,\@files;} |
sub _pack_bin
{ my ($self,$first,$sec) = @_;
my @first = (split('',$first->seq));
my @sec = (split('',$sec->seq));
my %base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5,
'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5);
my @files;
my @pairs;
my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir);
foreach my $index(0..$#first){
unless($first[$index] eq '-' && $sec[$index] eq '-'){
print $tfh1 pack("H2",$base_code{$first[$index]}.$base_code{$sec[$index]});
}
}
close ($tfh1);
undef ($tfh1);
push @files, $outfile;
push @pairs,[$first->id,$sec->id];
return\@ pairs,\@files;} |
sub _make_plotfile
{ my ($self,$sim_aln,$pairs,$files) = @_;
my ($tfh1,$plotfile) = $self->io->tempfile(-dir=>$self->tempdir);
my @ids = map{$_->id}$sim_aln->each_seq;
print $tfh1 "TITLE ".$self->title."\n\n";
print $tfh1 "OUTPUT ".$self->outfile."\n\n" ;
print $tfh1 "SEQUENCES ";
print $tfh1 join(" ",@ids)."\n\n";
foreach my $index(0..$#$pairs){
print $tfh1 "ALIGN ".$files->[$index]." BINARY\n";
print $tfh1 " SEQUENCES ".$pairs->[$index]->[0]." ".$pairs->[$index]->[1]."\n";
print $tfh1 " REGIONS ".$self->min_perc_id." ".$self->min_length."\n";
print $tfh1 " MIN ".$self->plotmin."\n";
print $tfh1 " DIFFS ". $self->diffs ."\n\n" if $self->diffs;
if($self->region_file||$self->region_file_dir){
my $file = " REGION_FILE ";
$file.=$self->region_file_dir."/" if $self->region_file_dir;
$file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".region\n\n";
print $tfh1 $file;
}
if($self->score_file || $self->score_file_dir){
my $file = " SCORE_FILE ";
$file.=$self->score_file_dir."/" if $self->score_file_dir;
$file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".score\n\n";
print $tfh1 $file;
}
if($self->alignment_file || $self->alignment_file_dir){
my $file = " ALIGNMENT_FILE ";
$file.=$self->alignment_file_dir."/" if $self->alignment_file_dir;
$file.=$pairs->[$index]->[0]."_".$pairs->[$index]->[1].".alignment\n\n";
print $tfh1 $file;
}
print $tfh1 " CONTIGS_FILE ". $self->contigs_file ."\n\n" if $self->contigs_file;
print $tfh1 " USE_ORDER ". $self->use_order."\n\n" if $self->use_order;
print $tfh1 "END\n\n ";
}
my $annotation_file;
if((ref $self->annotation eq 'ARRAY')&& $self->annotation->[0]->isa("Bio::SeqFeatureI")){
$annotation_file = $self->_dump2gff($self->annotation);
$self->annotation_format('GFF');
}
elsif($self->annotation){
$annotation_file = $self->annotation;
}
$annotation_file .= " GFF" if $self->annotation_format=~/GFF/i;
print $tfh1 "GENES ".$annotation_file."\n\n " if $annotation_file;
print $tfh1 "LEGEND on\n\n";
print $tfh1 "COORDINATE ".join(" ",@{$self->_coordinate})."\n\n";
print $tfh1 "PAPER letter\n\n";
print $tfh1 "BASES ".$self->bases."\n\n";
print $tfh1 "TICK_DIST ".$self->tickdist."\n\n";
print $tfh1 "RESOLUTION ".$self->resolution."\n\n";
print $tfh1 "WINDOW ".$self->window."\n\n";
print $tfh1 "NUM_WINDOWS ".$self->numwindows."\n\n";
print $tfh1 "AXIS_LABEL ".$self->axis_label ."\n\n" if $self->axis_label;
print $tfh1 "TICKS_FILE ".$self->ticks_file ."\n\n" if $self->ticks_file;
print $tfh1 "SNPS_FILE"." ".$self->snps_file."\n\n" if $self->snps_file;
print $tfh1 "GAPS ".$self->gaps ."\n\n"if $self->gaps;
print $tfh1 "REPEATS_FILE ".$self->repeats_file ."\n\n" if $self->repeats_file;
print $tfh1 "FILTER_REPEATS ".$self->filter_repeats ."\n\n" if $self->filter_repeats;
print $tfh1 "NUM_PAGES ".$self->num_pages ."\n\n" if $self->num_pages;
print $tfh1 "START ".$self->start ."\n\n" if $self->start;
print $tfh1 "END ".$self->end ."\n\n" if $self->end;
my $color = $self->color;
if(ref $color eq 'HASH'){
foreach my $region_type (keys %$color){
print $tfh1 "COLOR ".$region_type." ".$color->{$region_type}."\n\n";
}
}
close ($tfh1);
undef $tfh1;
if($self->plotfile) { copy($plotfile,$self->plotfile);
}
else {
$self->plotfile($plotfile);
}
return $self->plotfile;} |
sub _dump2gff
{ my ($self,$feat) = @_;
my ($tfh1,$file) = $self->io->tempfile(-dir=>$self->tempdir);
foreach my $f(@$feat){
print $tfh1 $f->gff_string."\n";
}
close ($tfh1);
undef $tfh1;
return $file;} |
sub _run_Vista
{ my ($self,$infile) = @_;
$self->debug( "Running Vista\n");
my $java = $self->java;
my $param = $self->java_param || '';
my $cmd = $java." ".$param.' Vista ';
$cmd .= " -q " if $self->quiet || $self->verbose < 0;
$cmd .= " -d " if $self->debug;
$cmd .= $infile;
$self->debug($cmd);
my $status = system ($cmd);
$self->throw("Problem running Vista: $?\n ") if $status != 0;
return 1;} |
sub _coordinate
{ my ($self,$val) = @_;
if($val){
$self->{'_coordinate'} = $val;
}
return $self->{'_coordinate'};} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
Please direct usage questions or support issues to the mailing list:
bioperl-l@bioperl.org
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
Title : outfile
Usage : $obj->outfile
Function : Get/Set method outfile
Args :
Title : min_perc_id
Usage : $obj->min_perc_id
Function : Get/Set method min_perc_id
Args :
Title : quiet
Usage : $obj->quiet
Function : Get/Set method quiet
Args :
Title : verbose
Usage : $obj->verbose
Function : Get/Set method verbose
Args :
Title : annotation_format
Usage : $obj->annotation_format
Function : Get/Set method annotation_format
Args :
Title : region_file
Usage : $obj->region_file
Function : Get/Set method region_file
Args :
Title : score_file
Usage : $obj->score_file
Function : Get/Set method score_file
Args :
Title : alignment_file
Usage : $obj->alignment_file
Function : Get/Set method alignment_file
Args :
Title : contigs_file
Usage : $obj->contigs_file
Function : Get/Set method contigs_file
Args :
Title : diffs
Usage : $obj->diffs
Function : Get/Set method diffs
Args :
Title : plotfile
Usage : $obj->plotfile
Function : Get/Set method plotfile
Args :
Title : min_length
Usage : $obj->min_length
Function : Get/Set method min_length
Args :
Title : plotmin
Usage : $obj->plotmin
Function : Get/Set method plotmin
Args :
Title : annotation
Usage : $obj->annotation
Function : Get/Set method annotation
Args :
Title : bases
Usage : $obj->bases
Function : Get/Set method bases
Args :
Title : tickdist
Usage : $obj->tickdist
Function : Get/Set method tickdist
Args :
Title : resolution
Usage : $obj->resolution
Function : Get/Set method resolution
Args :
Title : title
Usage : $obj->title
Function : Get/Set method title
Args :
Title : window
Usage : $obj->window
Function : Get/Set method window
Args :
Title : numwindows
Usage : $obj->numwindows
Function : Get/Set method numwindows
Args :
Title : start
Usage : $obj->start
Function : Get/Set method start
Args :
Title : end
Usage : $obj->end
Function : Get/Set method end
Args :
Title : num_plot_lines
Usage : $obj->num_plot_lines
Function : Get/Set method num_plot_lines
Args :
Title : legend
Usage : $obj->legend
Function : Get/Set method legend
Args :
Title : filename
Usage : $obj->filename
Function : Get/Set method filename
Args :
Title : axis_label
Usage : $obj->axis_label
Function : Get/Set method axis_label
Args :
Title : ticks_file
Usage : $obj->ticks_file
Function : Get/Set method ticks_file
Args :
Title : color
Usage : $obj->color
Function : Get/Set method color
Args :
Title : use_order
Usage : $obj->use_order
Function : Get/Set method use_order
Args :
Title : gaps
Usage : $obj->gaps
Function : Get/Set method gaps
Args :
Title : snps_file
Usage : $obj->snps_file
Function : Get/Set method snps_file
Args :
Title : repeats_file
Usage : $obj->repeats_file
Function : Get/Set method repeats_file
Args :
Title : filter_repeats
Usage : $obj->filter_repeats
Function : Get/Set method filter_repeats
Args :