Bio::Tools::SeqPattern Backtranslate
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Toolbar
WebCvs
Summary
Bio::Tools::SeqPattern::Backtranslate
Package variables
Privates (from "my" definitions)
( @codon_library, $ct );
%convert;
Included modules
Bio::Seq
Bio::Tools::CodonTable
Carp qw ( croak )
List::MoreUtils qw ( uniq )
Inherit
Bio::Root::Root
Synopsis
No synopsis!
Description
This module should not be used directly. It provides helper methods to
Bio::Tools::SeqPattern to reverse translate protein patterns.
Methods
_reverse_translate_motif
No description
Code
_parse_motif
No description
Code
_tokenize_motif
No description
Code
_contract_codons
No description
Code
_expand_codon
No description
Code
_convert
No description
Code
_uniq_string
No description
Code
_negated_aas_to_codon
No description
Code
Methods description
None available.
Methods code
_reverse_translate_motifdescriptionprevnextTop
sub _reverse_translate_motif {
   # Main subroutine. It takes a Profam-like motif and returns its
# reverse translation using degenerate codons.
# Steps:
# 1. Tokenize, then parse tokens.
# 2. Reverse translate each token type.
# 3. Join tokens in original order. Return the resulting string.
my $motif = shift; $motif =~ s/\./X/g; $motif = uc $motif; ### 1. Tokenize, parse the motif.
my ( $ordered, $classified ) = _parse_motif($motif); ### 2. Reverse translate each token type.
# Reverse translate the plain (unambiguous) tokens.
my $ct = Bio::Tools::CodonTable->new; foreach my $seq ( @{ $classified->{plain} } ) { my $seqO = Bio::Seq->new( -seq => $$seq, -alphabet => 'protein' ); $$seq = $ct->reverse_translate_all($seqO); } # Reverse translate the ambiguous tokens.
foreach my $token ( @{ $classified->{ambiguous} } ) { my ($aas) = $$token =~ m(([A-Za-z\.]+)); my @codons_to_contract; foreach my $residue ( split '', $aas ) { push @codons_to_contract, $ct->revtranslate($residue); } my $ambiguous_codon = _contract_codons(@codons_to_contract); $$token = $ambiguous_codon; } # Reverse translate the negated residues.
foreach my $token ( @{ $classified->{negated} } ) { my ($aas) = $$token =~ m(([A-Za-z\.]+)); my $ambiguous_codon = _negated_aas_to_codon($aas); $$token = $ambiguous_codon; } ### 3. Join the profile back from its tokens.
return join '', map {$$_} @{$ordered};
}
_parse_motifdescriptionprevnextTop
sub _parse_motif {
   # Profam-like motif parser. It takes the pattern as a string, and
# returns two data structures that contain the tokens, organized
# by order of appearance in the pattern (first return value) and by
# category (second return value).
my $motif = shift; my $parser = _tokenize_motif($motif); my ( %tokens, @tokens ); while ( my $token = $parser->() ) { croak ("Unknown syntax token: <", $token->[1], ">") if ( $token->[0] eq 'UNKNOWN' ); push @{ $tokens{ $token->[0] } },\$ token->[1]; push @tokens,\$ token->[1]; } return (\@ tokens,\% tokens );
}
_tokenize_motifdescriptionprevnextTop
sub _tokenize_motif {
   # Return a tokenizer iterator that sequentially recognizes and
# returns each token in the input pattern.
# Examples of each token type:
# ambiguous: a position with more than one possible residue.
# eg. [ALEP]
# negated: a position in which some residues are excluded.
# eg. [^WY]
# plain: a common sequence of residues. One position, one residue.
# eg. MAAEIK
# open_par, close_par: tags surrounding a motif that is repeated
# a certain number of times.
# eg. (...){3}
my $target = shift; return sub { return [ 'ambiguous', $1 ] if $target =~ /\G (\[[A-Za-z\.]+\]) /gcx; return [ 'negated', $1 ] if $target =~ /\G (\[\^[A-Za-z\.]+\]) /gcx; return [ 'plain', $1 ] if $target =~ /\G ([A-Za-z\.]+) /gcx; return [ 'open_par', $1 ] if $target =~ /\G (\() /gcx; return [ 'close_par', $1 ] if $target =~ /\G (\)[\{\d+[,\d+]*\}]*) /gcx; return [ 'UNKNOWN', $1 ] if $target =~ /\G (.) /gcx; return; };
}
_contract_codonsdescriptionprevnextTop
sub _contract_codons {
   # Take a list of codons, return an ambiguous codon.
my @codons = map { uc $_ } @_; my @by_letter = ( [], [], [], ); my $ambiguous_codon; foreach my $codon (@codons) { my @letters = split '', $codon; for my $i ( 0 .. 2 ) { push @{ $by_letter[$i] }, $letters[$i]; } } for my $i ( 0 .. 2 ) { $ambiguous_codon .= _convert( 'dna', _uniq_string( @{ $by_letter[$i] } ) ); } return $ambiguous_codon;
}
_expand_codondescriptionprevnextTop
sub _expand_codon {
   # Given a degenerate codon, return a list with all its
# constituents. Takes a three-letter string (codon) as
# input, returns a list with three-letter scalars.
my $codon = shift; die "Wrong codon length!\n" if length $codon != 3; my ( @codons, @return_bases ); my @orig_bases = split '', $codon; for my $i ( 0 .. 2 ) { # from each redundant base, create a list with all their
# components (e.g., N -> (A, C, G, T) );
my @components = split '', _convert('dna', $orig_bases[$i] ); $orig_bases[$i] = [@components]; } # Combine all the bases of each of the three positions of the
# codons, and build the return list.
for my $i ( @{ $orig_bases[0] } ) { for my $j ( @{ $orig_bases[1] } ) { for my $k ( @{ $orig_bases[2] } ) { push @return_bases, $i . $j . $k; } } } return @return_bases; } { my %convert;
}
_convertdescriptionprevnextTop
sub _convert {
      # Interconvert between redundant and non-redundant protein and
# dna alphabets. Takes an alphabet (protein or dna) and a string
# with the letter, and returns its equivalent in
# redundant/non-redundant alphabet. Example ACTG -> N.
my ($alphabet, $letter) = @_; unless ( $alphabet and $alphabet =~ /^dna$|^protein$/i and $letter and length $letter <= 4 ) { croak "Wrong arguments!\n"; } unless (%convert) { %convert = ( 'dna' => { qw(N ACGT B CGT D AGT H ACT V ACG K GT M AC R AG S CG W AT Y CT A A C C T T G G) }, 'protein' => { '.' => 'ACDEFGHIJKLMNOPQRSTUVWY', X => 'ACDEFGHIJKLMNOPQRSTUVWY', Z => 'QE', B => 'ND', }, ); # Make %convert hash key/value agnostic.
foreach my $alphabet ( keys %convert ) { map { $convert{$alphabet}->{ $convert{$alphabet}{$_} } = $_ } keys %{ $convert{$alphabet} }; } } return $convert{$alphabet}{$letter}; }
}
_uniq_stringdescriptionprevnextTop
sub _uniq_string {
   # Takes a list of letters and returns an alphabetically sorted
# list with unique elements.
my @letters = @_; return join '', sort { $a cmp $b } uniq @letters; } { my ( @codon_library, $ct );
}
_negated_aas_to_codondescriptionprevnextTop
sub _negated_aas_to_codon {
      # Given a string of residues, returns a degenerate codon that will
# not be translated into any of them, while maximizing degeneracy
# (ie, it tries to also translate into as many residues as possible).
# This functionality is required for reverse translating profiles
# that contain negative patterns: [^X]. This means that the current
# position should not contain aminoacid X, but can have any of the
# others. The reverse translated nucleotide sequence should
# reflect this.
# Approach: construct a list of all possible codons, incluiding all
# degenerate bases. This is an array of 15x15x15 = 3375 elements.
# Order them by descendent "degeneracy".
# Return the first one whose expansion in 4-lettered codons
# doesn't contain a codon that translates into any of the
# non-wanted residues.
# * Since this takes some time, I presorted them and saved them.
# Reading them from a file takes a fraction of the time that it taes
# to re-sort them every time the application is launched.
my $aas_to_avoid = shift; # Initialize reusable variables if it's the first time the sub
# is called.
unless (@codon_library) { while (<DATA>) { chomp; push @codon_library, split ' ', $_ } } unless ($ct) { $ct = Bio::Tools::CodonTable->new; } # Reverse translate the unwanted aminoacids to unwanted codons.
my @unwanted_codons; foreach my $aa ( split '', $aas_to_avoid ) { push @unwanted_codons, $ct->revtranslate($aa); } foreach my $degenerate_codon (@codon_library) { my @codons = _expand_codon($degenerate_codon); my $success = 1; foreach my $unwanted (@unwanted_codons) { if ( grep { uc $unwanted eq $_ } @codons ) { $success = 0; } } if ($success) { return $degenerate_codon } } } } 1;
}
General documentation
COPYRIGHT & LICENSETop
Copyright 2009 Bruno Vecchi, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.