| Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
| WebCvs |
use Bio::PrimarySeq;
use Bio::Tools::IUPAC;
# Get the IUPAC code for proteins my %iupac_prot = Bio::Tools::IUPAC->new->iupac_iup; # Create a sequence with degenerate residues my $ambiseq = Bio::PrimarySeq->new(-seq => 'ARTCGUTGN', -alphabet => 'dna'); # Create all possible non-degenerate sequences my $iupac = Bio::Tools::IUPAC->new(-seq => $ambiseq); while ($uniqueseq = $iupac->next_seq()) { # process the unique Bio::Seq object. } # Get a regular expression that matches all possible sequences my $regexp = $iupac->regexp();
IUPAC-IUB SYMBOLS FOR NUCLEOTIDE (DNA OR RNA) NOMENCLATURE:There are a few things Bio::Tools::IUPAC can do for you:
Cornish-Bowden (1985) Nucl. Acids Res. 13: 3021-3030
------------------------------------------ Symbol Meaning Nucleic Acid ------------------------------------------ A A Adenine C C Cytosine G G Guanine T T Thymine U U Uracil M A or C R A or G W A or T S C or G Y C or T K G or T V A or C or G H A or C or T D A or G or T B C or G or T X G or A or T or C N G or A or T or C IUPAC-IUP AMINO ACID SYMBOLS: Biochem J. 1984 Apr 15; 219(2): 345-373 Eur J Biochem. 1993 Apr 1; 213(1): 2 ------------------------------------------ Symbol Meaning ------------------------------------------ A Alanine B Aspartic Acid, Asparagine C Cysteine D Aspartic Acid E Glutamic Acid F Phenylalanine G Glycine H Histidine I Isoleucine J Isoleucine/Leucine K Lysine L Leucine M Methionine N Asparagine O Pyrrolysine P Proline Q Glutamine R Arginine S Serine T Threonine U Selenocysteine V Valine W Tryptophan X Unknown Y Tyrosine Z Glutamic Acid, Glutamine * Terminator
| BEGIN | Code | |
| new | Description | Code |
| _initialize | No description | Code |
| next_seq | Description | Code |
| iupac | Description | Code |
| iupac_amb | Description | Code |
| iupac_iup | Description | Code |
| iupac_iup_amb | Description | Code |
| iupac_iub | Description | Code |
| iupac_iub_amb | Description | Code |
| iupac_rev_iub | Description | Code |
| count | Description | Code |
| regexp | Description | Code |
| AUTOLOAD | No description | Code |
| new | code | next | Top |
Title : new |
| next_seq | code | prev | next | Top |
Title : next_seq |
| iupac | code | prev | next | Top |
Title : iupac |
| iupac_amb | code | prev | next | Top |
Title : iupac_amb |
| iupac_iup | code | prev | next | Top |
Title : iupac_iup |
| iupac_iup_amb | code | prev | next | Top |
Title : iupac_iup_amb |
| iupac_iub | code | prev | next | Top |
Title : iupac_iub |
| iupac_iub_amb | code | prev | next | Top |
Title : iupac_iub_amb |
| iupac_rev_iub | code | prev | next | Top |
Title : iupac_rev_iub |
| count | code | prev | next | Top |
Title : count |
| regexp | code | prev | next | Top |
Title : regexp |
| BEGIN | Top |
# Ambigous nucleic residues are matched to unambiguous residues}
%IUB = ( A => [qw(A)], C => [qw(C)], G => [qw(G)], T => [qw(T)], U => [qw(U)], M => [qw(A C)], R => [qw(A G)], W => [qw(A T)], S => [qw(C G)], Y => [qw(C T)], K => [qw(G T)], V => [qw(A C G)], H => [qw(A C T)], D => [qw(A G T)], B => [qw(C G T)], N => [qw(A C G T)], X => [qw(A C G T)], ); # Same as %IUB but ambigous residues are matched to ambiguous residues only
%IUB_AMB = ( M => [qw(M)], R => [qw(R)], W => [qw(W)], S => [qw(S)], Y => [qw(Y)], K => [qw(K)], V => [qw(M R S V)], H => [qw(H M W Y)], D => [qw(D K R W)], B => [qw(B K S Y)], N => [qw(B D H K M N R S V W Y)], ); # The inverse of %IUB
%REV_IUB = ( A => 'A', T => 'T', C => 'C', G => 'G', AC => 'M', AG => 'R', AT => 'W', CG => 'S', CT => 'Y', GT => 'K', ACG => 'V', ACT => 'H', AGT => 'D', CGT => 'B', ACGT => 'N', N => 'N' ); # Same thing with proteins now
%IUP = ( A => [qw(A)], B => [qw(D N)], C => [qw(C)], D => [qw(D)], E => [qw(E)], F => [qw(F)], G => [qw(G)], H => [qw(H)], I => [qw(I)], J => [qw(I L)], K => [qw(K)], L => [qw(L)], M => [qw(M)], N => [qw(N)], O => [qw(O)], P => [qw(P)], Q => [qw(Q)], R => [qw(R)], S => [qw(S)], T => [qw(T)], U => [qw(U)], V => [qw(V)], W => [qw(W)], X => [qw(X)], Y => [qw(Y)], Z => [qw(E Q)], '*' => [qw(*)], ); %IUP_AMB = ( B => [qw(B)], J => [qw(J)], Z => [qw(Z)], );
| new | description | prev | next | Top |
my ($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($seq) = $self->_rearrange([qw(SEQ)],@args); if ( (not defined $seq) && @args && ref($args[0]) ) { # parameter not passed as named parameter?}
$seq = $args[0]; } if (defined $seq) { if (not $seq->isa('Bio::PrimarySeqI')) { $self->throw('Must supply a sequence object'); } if (length $seq->seq == 0) { $self->throw('Sequence had zero-length'); } $self->{'_seq'} = $seq; } return $self;
| _initialize | description | prev | next | Top |
my ($self) = @_; my %iupac = $self->iupac; $self->{'_alpha'} = [ map { $iupac{uc $_} } split('', $self->{'_seq'}->seq) ]; $self->{'_string'} = [(0) x length($self->{'_seq'}->seq())]; $self->{'_string'}->[0] = -1;}
| next_seq | description | prev | next | Top |
my ($self) = @_; if (not exists $self->{'_string'}) { $self->_initialize(); } for my $i ( 0 .. $#{$self->{'_string'}} ) { next unless $self->{'_string'}->[$i] || @{$self->{'_alpha'}->[$i]} > 1; if ( $self->{'_string'}->[$i] == $#{$self->{'_alpha'}->[$i]} ) { # rollover}
if ( $i == $#{$self->{'_string'}} ) { # end of possibilities
return; } else { $self->{'_string'}->[$i] = 0; next; } } else { $self->{'_string'}->[$i]++; my $j = -1; my $seqstr = join('', map { $j++; $self->{'_alpha'}->[$j]->[$_]; } @{$self->{'_string'}}); my $desc = $self->{'_seq'}->desc() || ''; $self->{'_num'}++; 1 while $self->{'_num'} =~ s/(\d)(\d\d\d)(?!\d)/$1,$2/; $desc =~ s/( \[Bio::Tools::IUPAC-generated\sunique sequence # [^\]]*\])|$/ \[Bio::Tools::IUPAC-generated unique sequence # $self->{'_num'}\]/; $self->{'_num'} =~ s/,//g; # Return a fresh sequence object
return Bio::PrimarySeq->new(-seq => $seqstr, -desc => $desc); } }
| iupac | description | prev | next | Top |
my ($self) = @_; my $alphabet = lc( $self->{'_seq'}->alphabet() ); if ( ($alphabet eq 'dna') or ($alphabet eq 'rna') ) { return %IUB; # nucleic}
} elsif ( $alphabet eq 'protein' ) { return %IUP; # proteic
} else { $self->throw("The input sequence had the unknown alphabet '$alphabet'\n"); }
| iupac_amb | description | prev | next | Top |
my ($self) = @_; my $alphabet = lc( $self->{'_seq'}->alphabet() ); if ( ($alphabet eq 'dna') or ($alphabet eq 'rna') ) { return %IUB_AMB; # nucleic}
} elsif ( $alphabet eq 'protein' ) { return %IUP_AMB; # proteic
} else { $self->throw("The input sequence had the unknown alphabet '$alphabet'\n"); }
| iupac_iup | description | prev | next | Top |
return %IUP;}
| iupac_iup_amb | description | prev | next | Top |
return %IUP_AMB;}
| iupac_iub | description | prev | next | Top |
return %IUB;}
| iupac_iub_amb | description | prev | next | Top |
return %IUB_AMB;}
| iupac_rev_iub | description | prev | next | Top |
return %REV_IUB;}
| count | description | prev | next | Top |
my ($self) = @_; if (not exists $self->{'_string'}) { $self->_initialize(); } my $count = 1; $count *= scalar(@$_) for (@{$self->{'_alpha'}}); return $count;}
| regexp | description | prev | next | Top |
my ($self) = @_; my $re; my $seq = $self->{'_seq'}->seq; my %iupac = $self->iupac; my %iupac_amb = $self->iupac_amb; for my $pos (0 .. length($seq)-1) { my $res = substr $seq, $pos, 1; my $iupacs = $iupac{$res}; my $iupacs_amb = $iupac_amb{$res}; if (not defined $iupacs) { $self->throw("Primer sequence '$seq' is not a valid IUPAC sequence.". " Offending character was '$res'.\n"); } if (scalar @$iupacs > 1) { $re .= '[' . join('',@$iupacs,@$iupacs_amb) . ']'; } else { $re .= $$iupacs[0]; } } return $re;}
| AUTOLOAD | description | prev | next | Top |
my $self = shift @_; my $method = $AUTOLOAD; $method =~ s/.*:://; return $self->{'_seq'}->$method(@_) unless $method eq 'DESTROY'; } 1;}
| FEEDBACK | Top |
| Mailing Lists | Top |
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
| Support | Top |
| Reporting Bugs | Top |
https://redmine.open-bio.org/projects/bioperl/
| AUTHOR - Aaron Mackey | Top |
| APPENDIX | Top |