Bio::Tools::Run::Alignment
Sim4
Toolbar
Summary
Bio::Tools::Run::Alignment::Sim4 - Wrapper for Sim4 program that allows
for alignment of cdna to genomic sequences
Package variables
No package variables defined.
Included modules
Bio::AlignIO
Bio::Root::IO
Bio::Root::Root
Bio::SeqIO
Bio::SimpleAlign
Bio::Tools::Run::WrapperBase
Bio::Tools::Sim4::Results
Inherit
Bio::Root::Root Bio::Tools::Run::WrapperBase
Synopsis
use Bio::Tools::Run::Alignment::Sim4;
my @params = (W=>15,K=>17,D=>10,N=>10,cdna_seq=>"mouse_cdna.fa",genomic_seq=>"mouse_genomic.fa");
my $sim4 = Bio::Tools::Run::Alignment::Sim4->new(@params);
my @exon_sets = $sim4->align;
foreach my $set(@exon_sets){
foreach my $exon($set->sub_SeqFeature){
print $exon->start."\t".$exon->end."\t".$exon->strand."\n";
print "\tMatched ".$exon->est_hit->seq_id."\t".$exon->est_hit->start."\t".$exon->est_hit->end."\n";
}
}
One can also provide a est database
$sio = Bio::SeqIO->new(-file=>"est.fa",-format=>"fasta");
@est_seq=();
while(my $seq = $sio->next_seq){
push @est_seq,$seq;
}
my @exon_sets = $factory->align(\@est_seq,$genomic);
Description
Sim4 program is developed by Florea et al. for aligning cdna/est
sequence to genomic sequences
Florea L, Hartzell G, Zhang Z, Rubin GM, Miller W.
A computer program for aligning a cDNA sequence with a genomic DNA sequence.
Genome Res 1998 Sep;8(9):967-74
The program is available for download here:
http://globin.cse.psu.edu/
Methods
Methods description
Title : program_name Usage : $factory->program_name() Function: holds the program name Returns: string Args : None |
Title : program_dir Usage : $factory->program_dir(@params) Function: returns the program directory, obtained from ENV variable. Returns: string Args : |
Title : version Usage : not supported Function: Cannot determine from program Example : Returns : float or undef Args : none |
Title : align Usage : $cdna = 't/data/cdna.fa'; $genomic = 't/data/cdna.fa'; @exon_set = $factory->align($cdna,$genomic); or #@seq_array is array of Seq objs $cdna = \@seq_array; @exon_set = $factory->align($cdna,$genomic); of @exon_set = $factory->align($cdna->[0],$genomic)
Function: Perform a Sim4 alignment
Returns : An array of Bio::SeqFeature::Generic objects which has
exons as sub seqfeatures.
Args : Name of two files containing fasta sequences,
or 2 Bio::SeqI objects
or a combination of both
first is assumed to be cdna
second is assumed to be genomic
More than one cdna may be provided. If an object,
assume that its an array ref. |
Title : _run Usage : Internal function, not to be called directly Function: makes actual system call to Sim4 program Example : Returns : nothing; Sim4 output is written to a temp file Args : Name of a file containing a set of unaligned fasta sequences and hash of parameters to be passed to Sim4 |
Title : _setinput Usage : Internal function, not to be called directly Function: Create input file for Sim4 program Example : Returns : name of file containing Sim4 data input Args : Seq or Align object reference or input file name |
Title : _setparams Usage : Internal function, not to be called directly Function: Create parameter inputs for Sim4 program Example : Returns : parameter string to be passed to Sim4 during align or profile_align Args : name of calling object |
Methods code
BEGIN {
@SIM4_PARAMS= qw(A W X K C R D H P N B);
@OTHER_PARAMS= qw(CDNA_SEQ GENOMIC_SEQ OUTFILE);
@OTHER_SWITCHES = qw(SILENT QUIET VERBOSE);
foreach my $attr ( @SIM4_PARAMS, @OTHER_PARAMS,
@OTHER_SWITCHES) { $OK_FIELD{$attr}++;} |
sub program_name
{ return 'sim4'; } |
sub program_dir
{ return Bio::Root::IO->catfile($ENV{SIM4DIR}) if $ENV{SIM4DIR};} |
sub new
{ my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
$self->io->_initialize_io();
$self->A(0); my ($attr, $value);
while (@args) {
$attr = shift @args;
$value = shift @args;
if ($attr =~/est_first/i ) { $self->{est_first} = $value; next; } next if( $attr =~ /^-/ ); if ($attr =~/'PROGRAM'/i ) {
$self->executable($value);
next;
}
$self->$attr($value);
}
return $self; } |
sub AUTOLOAD
{ my $self = shift;
my $attr = $AUTOLOAD;
$attr =~ s/.*:://;
$attr = uc $attr;
$self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
$self->{$attr} = shift if @_;
return $self->{$attr};} |
sub version
{ my ($self) = @_;
return undef;} |
sub align
{
my ($self,$cdna,$genomic) = @_;
$self->cdna_seq($cdna) if $cdna;
$self->throw("Need to provide a cdna sequence") unless $self->cdna_seq;
$self->genomic_seq($genomic) if $genomic;
$self->throw("Need to provide a genomic sequence") unless $self->genomic_seq;
my ($temp,$infile1, $infile2, $est_first,$seq);
my ($attr, $value, $switch);
($est_first,$infile1,$infile2)= $self->_setinput($self->cdna_seq,$self->genomic_seq);
if (!($infile1 && $infile2)) {$self->throw("Bad input data (sequences need an id ) or less than 2 sequences in align!");}
my $param_string = $self->_setparams();
my @exon_sets = $self->_run($est_first,$infile1,$infile2,$param_string);
return @exon_sets;
}
} |
sub _run
{ my ($self,$estfirst,$infile1,$infile2,$param_string) = @_;
my $instring;
$self->debug( "Program ".$self->executable."\n");
if(! $self->outfile){
my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir);
close($tfh);
undef $tfh;
$self->outfile($outfile);
}
my $outfile = $self->outfile();
my $commandstring = $self->executable." $infile1 $infile2 $param_string > $outfile";
if($self->quiet || $self->silent || ($self->verbose < 0)){
$commandstring .= " 2>/dev/null";
}
$self->debug( "Sim4 command = $commandstring");
my $status = system($commandstring);
$self->throw( "Sim4 call ($commandstring) crashed: $?\n ") unless $status==0;
my $sim4_parser = Bio::Tools::Sim4::Results->new(-file=>$outfile,-estfirst=>$estfirst);
my @out;
while(my $exonset = $sim4_parser->next_exonset){
push @out, $exonset;
}
return @out;} |
sub _setinput
{ my ($self, $cdna,$genomic) = @_;
my ($infilename, $seq, $temp, $tfh1,$tfh2,$outfile1,$outfile2);
my $estfirst= defined($self->{est_first}) ? $self->{_est_first} : 1;
my ($cdna_file,$genomic_file);
if(ref($cdna)) {
my @cdna = ref $cdna eq "ARRAY" ? @{$cdna} : ($cdna);
($tfh1,$cdna_file) = $self->io->tempfile(-dir=>$self->tempdir);
my $seqio = Bio::SeqIO->new(-fh=>$tfh1,-format=>'fasta');
foreach my $c (@cdna){
$seqio->write_seq($c);
}
close $tfh1;
undef $tfh1;
if($#cdna > 0){
$estfirst=0;
}
}
else {
my $sio = Bio::SeqIO->new(-file=>$cdna,-format=>"fasta");
my $count = 0;
while(my $seq = $sio->next_seq){
$count++;
}
$estfirst = $count > 1 ? 0:1;
$cdna_file = $cdna;
}
if( ref($genomic) ) {
($tfh1,$genomic_file) = $self->io->tempfile(-dir=>$self->tempdir);
my $seqio = Bio::SeqIO->new(-fh=>$tfh1,-format=>'fasta');
$seqio->write_seq($genomic);
close $tfh1;
undef $tfh1;
}
else {
$genomic_file = $genomic;
}
return ($estfirst,$cdna_file,$genomic_file) if $estfirst;
return ($estfirst,$genomic_file,$cdna_file); } |
sub _setparams
{ my ($attr, $value, $self);
$self = shift;
my $param_string = "";
for $attr ( @SIM4_PARAMS ) {
$value = $self->$attr();
next unless (defined $value);
my $attr_key = uc $attr; $attr_key = ' '.$attr_key;
$param_string .= $attr_key.'='.$value;
}
return $param_string;
}
1;
} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
Please direct usage questions or support issues to the mailing list:
bioperl-l@bioperl.org
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _