| Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
| WebCvs |
use Bio::Matrix::PSM::ProtMatrix;
# Create from memory by supplying probability matrix hash both as strings or
# arrays where the frequencies Hash entries of the form lN refer to an array
# of position-specific log-odds scores for amino acid N. Hash entries of the
# form pN represent the position-specific probability of finding amino acid N.
my %param = ( 'id' => 'A. thaliana protein atp1', '-e_val' => $score, 'lS' => [ '-2', '3', '-3', '2', '-3', '1', '1', '3' ], 'lF' => [ '-1', '-4', '0', '-5', '0', '-5', '-4', '-4' ], 'lT' => [ '-1', '1', '0', '1', '-2', '-1', '0', '1' ], 'lN' => [ '-3', '-1', '-2', '3', '-5', '5', '-2', '0' ], 'lK' => [ '-2', '0', '-3', '2', '-3', '2', '-3', '-1' ], 'lY' => [ '-2', '-3', '-3', '-4', '-3', '-4', '-4', '-4' ], 'lE' => [ '-3', '4', '-3', '2', '-4', '-2', '-3', '2' ], 'lV' => [ '0', '-2', '1', '-4', '1', '-4', '-1', '-3' ], 'lQ' => [ '-1', '0', '-2', '3', '-4', '1', '-3', '0' ], 'lM' => [ '8', '-3', '8', '-3', '1', '-3', '-3', '-3' ], 'lC' => [ '-2', '-3', '-3', '-4', '-3', '-4', '-3', '-3' ], 'lL' => [ '1', '-3', '1', '-4', '3', '-4', '-2', '-4' ], 'lA' => [ '-2', '1', '-2', '0', '-2', '-2', '2', '2' ], 'lW' => [ '-2', '-4', '-3', '-5', '-4', '-5', '-5', '-5' ], 'lP' => [ '-3', '-2', '-4', '-3', '-1', '-3', '6', '-3' ], 'lH' => [ '-2', '-2', '-3', '-2', '-5', '-2', '-2', '-3' ], 'lD' => [ '-4', '-1', '-3', '1', '-3', '-1', '-3', '4' ], 'lR' => [ '-2', '-1', '-3', '0', '-4', '4', '-4', '-3' ], 'lI' => [ '0', '-3', '0', '-4', '6', '-4', '-2', '-2' ], 'lG' => [ '-4', '-2', '-4', '-2', '-5', '-3', '-1', '-2' ], 'pS' => [ '0', '33', '0', '16', '1', '12', '11', '25' ], 'pF' => [ '0', '0', '2', '0', '3', '0', '0', '0' ], 'pT' => [ '0', '8', '7', '10', '1', '2', '7', '8' ], 'pN' => [ '0', '0', '2', '13', '0', '36', '1', '4' ], 'pK' => [ '0', '5', '0', '13', '1', '15', '0', '2' ], 'pY' => [ '0', '0', '0', '0', '0', '0', '0', '0' ], 'pE' => [ '0', '41', '1', '12', '0', '0', '0', '15' ], 'pV' => [ '0', '3', '9', '0', '2', '0', '3', '1' ], 'pQ' => [ '0', '0', '0', '15', '0', '4', '0', '3' ], 'pM' => [ '100', '0', '66', '0', '2', '0', '0', '0' ], 'pC' => [ '0', '0', '0', '0', '0', '0', '0', '0' ], 'pL' => [ '0', '0', '8', '0', '25', '0', '4', '0' ], 'pA' => [ '0', '10', '1', '9', '2', '0', '22', '16' ], 'pW' => [ '0', '0', '0', '0', '0', '0', '0', '0' ], 'pP' => [ '0', '0', '0', '0', '3', '1', '45', '0' ], 'pH' => [ '0', '0', '0', '0', '0', '0', '1', '0' ], 'pD' => [ '0', '0', '1', '7', '2', '2', '0', '22' ], 'pR' => [ '0', '0', '0', '3', '0', '27', '0', '0' ], 'pI' => [ '0', '0', '3', '0', '59', '1', '2', '3' ], 'pG' => [ '0', '0', '0', '1', '0', '0', '4', '1' ], ); my $matrix = Bio::Matrix::PSM::ProtMatrix( %param ); my $site = Bio::Matrix::PSM::ProtMatrix->new(%param); # Or get it from a file: use Bio::Matrix::PSM::IO; my $psmIO = Bio::Matrix::PSM::IO->new(-file => $file, -format => 'psi-blast'); while (my $psm = $psmIO->next_psm) { #Now we have a Bio::Matrix::PSM::Psm object, # see Bio::Matrix::PSM::PsmI for details #This is a Bio::Matrix::PSM::ProtMatrix object now my $matrix = $psm->matrix; } # Get a simple consensus, where alphabet is: # {A, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V,} # choosing the highest probability or N if prob is too low my $consensus = $site->consensus; # Retrieving and using regular expressions: my $regexp = $site->regexp; my $count = grep($regexp,$seq); my $count = ($seq=~ s/$regexp/$1/eg); print "Motif $mid is present $count times in this sequence\n";
iupac - return IUPAC compliant consensus as a stringNew methods, which might be of interest to anyone who wants to store
score - Returns the score as a real number
IC - information content. Returns a real number
id - identifier. Returns a string
accession - accession number. Returns a string
next_pos - return the sequence probably for each letter, IUPAC
symbol, IUPAC probability and simple sequence
consenus letter for this position. Rewind at the end. Returns a hash.
pos - current position get/set. Returns an integer.
regexp - construct a regular expression based on IUPAC consensus.
For example AGWV will be [Aa][Gg][AaTt][AaCcGg]
width - site width
get_string - gets the probability vector for a single base as a string.
get_array - gets the probability vector for a single base as an array.
get_logs_array - gets the log-odds vector for a single base as an array.
my $str=$matrix->get_compressed_freq('A');
or
my $str=$matrix->get_compressed_logs('A');
Loading from a database should be done with new, but is not yet implemented.my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1,1); for PSMor
my @arr=Bio::Matrix::PSM::_uncompress_string ($str,1000,2); for log odds
| new | Description | Code |
| alphabet | Description | Code |
| _calculate_consensus | Description | Code |
| next_pos | Description | Code |
| curpos | Description | Code |
| e_val | Description | Code |
| IC | Description | Code |
| accession_number | Description | Code |
| consensus | Description | Code |
| IUPAC | No description | Code |
| get_string | Description | Code |
| width | Description | Code |
| get_array | Description | Code |
| get_logs_array | Description | Code |
| id | Description | Code |
| regexp | Description | Code |
| regexp_array | Description | Code |
| _compress_array | Description | Code |
| _uncompress_string | Description | Code |
| get_compressed_freq | Description | Code |
| sequence_match_weight | Description | Code |
| _to_IUPAC | Description | Code |
| _to_cons | Description | Code |
| new | code | next | Top |
Title : new |
| alphabet | code | prev | next | Top |
Title : Returns an array (or array reference if desired) to the alphabet |
| _calculate_consensus | code | prev | next | Top |
Title : _calculate_consensus |
| next_pos | code | prev | next | Top |
Title : next_pos |
| curpos | code | prev | next | Top |
Title : curpos |
| e_val | code | prev | next | Top |
Title : e_val |
| IC | code | prev | next | Top |
Title : IC |
| accession_number | code | prev | next | Top |
Title : accession_number |
| consensus | code | prev | next | Top |
Title : consensus |
| get_string | code | prev | next | Top |
Title : get_string |
| width | code | prev | next | Top |
Title : width |
| get_array | code | prev | next | Top |
Title : get_array |
| get_logs_array | code | prev | next | Top |
Title : get_logs_array |
| id | code | prev | next | Top |
Title : id |
| regexp | code | prev | next | Top |
Title : regexp |
| regexp_array | code | prev | next | Top |
Title : regexp_array |
| _compress_array | code | prev | next | Top |
Title : _compress_array |
| _uncompress_string | code | prev | next | Top |
Title : _uncompress_string |
| get_compressed_freq | code | prev | next | Top |
Title : get_compressed_freq |
| sequence_match_weight | code | prev | next | Top |
Title : sequence_match_weight |
| _to_IUPAC | code | prev | next | Top |
Title : _to_IUPAC |
| _to_cons | code | prev | next | Top |
Title : _to_cons |
| new | description | prev | next | Top |
my ($class, @args) = @_; my $self = $class->SUPER::new(@args); my $consensus; #Too many things to rearrange, and I am creating simultanuously >500}
# such objects routinely, so this becomes performance issue
my %input; while( @args ) { (my $key = shift @args) =~ s/-//gi; #deletes all dashes (only dashes)!
$input{$key} = shift @args; } # get a protein alphabet for processing log-odds scores and probabilities
# maybe change this later on to allow for non-standard aa lists?
my @alphabet = qw/A R N D C Q E G H I L K M F P S T W Y V/; foreach my $aa (@alphabet) { $self->{"log$aa"} = defined($input{"l$aa"}) ? $input{"l$aa"} : $self->throw("Error: No log-odds information for $aa!"); $self->{"prob$aa"} = defined($input{"p$aa"}) ? $input{"p$aa"} : $self->throw("Error: No probability information for $aa!"); } $self->{_position} = 0; $self->{IC} = $input{IC}; $self->{e_val} = $input{e_val}; $self->{sites} = $input{sites}; $self->{width} = $input{width}; $self->{accession_number} = $input{accession_number}; $self->{_correction} = defined($input{correction}) ? $input{correction} : 1 ; # Correction might be unwanted- supply your own
# No id provided, null for the sake of rel db
$self->{id} = defined($input{id}) ? $input{id} : 'null'; $self->{_alphabet} =\@ alphabet; #Make consensus, throw if any one of the vectors is shorter
$self = _calculate_consensus($self,$input{model}); return $self;
| alphabet | description | prev | next | Top |
my $self = shift; if ( wantarray ) { return $self->{_alphabet}; } else { return @{$self->{_alphabet}}; }}
| _calculate_consensus | description | prev | next | Top |
my $self = shift; my $thresh = shift; # verify that all of the array lengths in %probs are the same}
my @lengths = map { scalar(@$_) } map {$self->{"prob$_"}} @{ $self->{_alphabet} }; my $len = shift @lengths; for ( @lengths ) { if ( $_ ne $len ) { $self->throw( "Probability matrix is damaged!\n" ) }; } # iterate over probs, generate the most likely sequence and put it into
# $self->{seq}. Put the probability of this sequence into $self->{seqp}.
for ( my $i = 0; $i < $len; $i++ ) { # get a list of all the probabilities at position $i, ordered by $self->{_alphabet}
my @probs = map { ${$self->{"prob$_"}}[$i] } @{ $self->{_alphabet} }; # calculate the consensus of @probs, put sequence into seqp and probabilities into seqp
(${$self->{seq}}[$i],${$self->{seqp}}[$i]) = $self->_to_cons( @probs, $thresh ); } return $self;
| next_pos | description | prev | next | Top |
my $self = shift; $self->throw("instance method called on class") unless ref $self; my $len = @{$self->{seq}}; my $pos = $self->{_position}; # return a PSM if we're still within range}
if ($pos<$len) { my %probs = map { ("p$_", ${$self->{"prob$_"}}[$pos]) } @{$self->{_alphabet}}; my %logs = map { ("l$_", ${$self->{"log$_"}}[$pos]) } @{$self->{_alphabet}}; my $base = ${$self->{seq}}[$pos]; my $prob = ${$self->{seqp}}[$pos]; $self->{_position}++; my %hash = ( %probs, %logs, base => $base, rel => $pos, prob => $prob ); # decide whether to return the hash or a reference to it
if ( wantarray ) { return %hash; } else { return\% hash; } } else { # otherwise, reset $self->{_position} and return nothing
$self->{_position} = 0; return; }
| curpos | description | prev | next | Top |
my $self = shift; if (@_) { $self->{_position} = shift; } return $self->{_position};}
| e_val | description | prev | next | Top |
my $self = shift; if (@_) { $self->{e_val} = shift; } return $self->{e_val};}
| IC | description | prev | next | Top |
my $self = shift; if (@_) { $self->{IC} = shift; } return $self->{IC};}
| accession_number | description | prev | next | Top |
my $self = shift; if (@_) { $self->{accession_number} = shift; } return $self->{accession_number};}
| consensus | description | prev | next | Top |
my $self = shift; my $thresh=shift; $self->_calculate_consensus($thresh) if ($thresh); #Change of threshold}
my $consensus=''; foreach my $letter (@{$self->{seq}}) { $consensus .= $letter; } return $consensus;
| IUPAC | description | prev | next | Top |
my $self = shift; return $self->consensus;}
| get_string | description | prev | next | Top |
my $self = shift; my $base = shift; my $string = ''; my @prob = @{$self->{"prob$base"}}; if ( ! @prob ) { $self->throw( "No such base: $base\n"); } foreach my $prob (@prob) { my $corrected = $prob*10; my $next = sprintf("%.0f",$corrected); $next = 'a' if ($next eq '10'); $string .= $next; } return $string;}
| width | description | prev | next | Top |
my $self = shift; my $width = @{$self->{probA}}; return $width;}
| get_array | description | prev | next | Top |
my $self = shift; my $letter = uc(shift); $self->throw ("No such base: $letter!\n") unless grep { /$letter/ } @{$self->{_alphabet}}; return @{$self->{"prob$letter"}};}
| get_logs_array | description | prev | next | Top |
my $self = shift; my $letter = uc(shift); $self->throw ("No such base: $letter!\n") unless grep { /$letter/ } @{$self->{_alphabet}}; return @{$self->{"log$letter"}};}
| id | description | prev | next | Top |
my $self = shift; if (@_) { $self->{id} = shift; } return $self->{id};}
| regexp | description | prev | next | Top |
my $self = shift; my $threshold = 20; if ( @_ ) { my $threshold = shift }; my @alphabet = @{$self->{_alphabet}}; my $width = $self->width; my (@regexp, $i); for ( $i = 0; $i < $width; $i++ ) { # get an array of the residues at this position with p > $threshold}
my @letters = map { uc($_).lc($_) } grep { $self->{"prob$_"}->[$i] >= $threshold } @alphabet; my $reg; if ( scalar(@letters) == 0 ) { $reg = '\.'; } else { $reg = '['.join('',@letters).']'; } push @regexp, $reg; } if ( wantarray ) { return @regexp; } else { return join '', @regexp; }
| regexp_array | description | prev | next | Top |
my $self = shift; return @{ $self->regexp };}
| _compress_array | description | prev | next | Top |
my ($array,$lm,$direct)=@_; my $str; return unless(($array) && ($lm)); $direct=1 unless ($direct); my $k1= ($direct==1) ? (255/$lm) : (127/$lm); foreach my $c (@{$array}) { $c=$lm if ($c>$lm); $c=-$lm if (($c<-$lm) && ($direct !=1)); $c=0 if (($c<0) && ($direct ==1)); my $byte=int($k1*$c); $byte=127+$byte if ($direct !=1);#Clumsy, should be really shift the bits}
my $char=chr($byte); $str.=$char; } return $str;
| _uncompress_string | description | prev | next | Top |
my ($str,$lm,$direct)=@_; my @array; return unless(($str) && ($lm)); $direct=1 unless ($direct); my $k1= ($direct==1) ? (255/$lm) : (127/$lm); while (my $c=chop($str)) { my $byte=ord($c); $byte=$byte-127 if ($direct !=1);#Clumsy, should be really shift the bits}
my $num=$byte/$k1;
unshift @array,$num; } return @array;
| get_compressed_freq | description | prev | next | Top |
my $self=shift; my $base=shift; my $string=''; my @prob; BASE: { if ($base eq 'A') { @prob = @{$self->{probA}} unless (!defined($self->{probA})); last BASE; } if ($base eq 'G') { @prob = @{$self->{probG}} unless (!defined($self->{probG})); last BASE; } if ($base eq 'C') { @prob = @{$self->{probC}} unless (!defined($self->{probC})); last BASE; } if ($base eq 'T') { @prob = @{$self->{probT}} unless (!defined($self->{probT})); last BASE; } $self->throw ("No such base: $base!\n"); } my $str= _compress_array(\@prob,1,1); return $str;}
| sequence_match_weight | description | prev | next | Top |
my ($self,$seq)=@_; return unless ($self->{logA}); my $seqlen = length($seq); my $width = $self->width; $self->throw("Error: Input sequence size ($seqlen) not equal to PSM size ($width)!\n") unless (length($seq) == $self->width); my ($score,$i) = (0,0); foreach my $letter ( split //, $seq ) { # add up the score for this position}
$score += $self->{"log$letter"}->[$i]; $i++; } return $score;
| _to_IUPAC | description | prev | next | Top |
my ($self,@probs,$thresh) = @_; # provide a default threshold of 5, corresponds to 5% threshold for}
# inferring that the aa at any position is the true aa
$thresh = 5 unless ( defined $thresh ); my ($IUPAC_aa,$max_prob) = ('X',$thresh); for my $aa ( @{$self->{_alphabet}} ) { my $prob = shift @probs; if ( $prob > $max_prob ) { $IUPAC_aa = $aa; $max_prob = $prob; } } return $IUPAC_aa, $max_prob;
| _to_cons | description | prev | next | Top |
return _to_IUPAC( @_ );}
| FEEDBACK | Top |
| Mailing Lists | Top |
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
| Support | Top |
| Reporting Bugs | Top |
https://redmine.open-bio.org/projects/bioperl/
| AUTHOR - James Thompson | Top |
| APPENDIX | Top |
| get_all_vectors | Top |
Title : get_all_vectors
Usage :
Function : returns all possible sequence vectors to satisfy the PFM under
a given threshold
Throws : If threshold outside of 0..1 (no sense to do that)
Example : my @vectors = $self->get_all_vectors(4);
Returns : Array of strings
Args : (optional) floating