use Bio::PopGen::Utilities;
use Bio::AlignIO;
my $in = new Bio::AlignIO(-file => 't/data/t7.aln',
-format => 'clustalw');
my $aln = $in->next_aln;
# get a population, each sequence is an individual and
# for the default case, every site which is not monomorphic
# is a 'marker'. Each individual will have a 'genotype' for the
# site which will be the specific base in the alignment at that
# site
my $pop = Bio::PopGen::Utilities->aln_to_population(-alignment => $aln);
# get the synonymous sites from the alignemt only as the 'genotypes'
# for the population
my $synpop = Bio::PopGen::Utilities->aln_to_population(-site_model => 'syn',
-alignment => $aln);
sub aln_to_population
{ my ($self,@args) = @_;
my ($aln,
$sitemodel,
$includefixed) = $self->_rearrange([qw(ALIGNMENT
SITE_MODEL
INCLUDE_MONOMORPHIC)],
@args);
if( ! defined $aln ) {
$self->warn("Must provide a valid Bio::SimpleAlign object to run aln_to_population");
return;
}
if( ! $aln->is_flush ) {
$self->warn("Must provide a Bio::SimpleAlign object with aligned sequences to aln_to_population!");
return;
}
my $population = Bio::PopGen::Population->new(-source => 'alignment');
my @seqs = map { $_->seq() } $aln->each_seq;
if( ! defined $sitemodel ||
$sitemodel =~ /all/i ) {
my $ct = 0;
my @inds;
my @seqs;
for my $seq ( $aln->each_seq ) {
my $ind = Bio::PopGen::Individual->new(-unique_id => $seq->display_id);
push @seqs, $seq->seq;
push @inds, $ind;
}
for( my $i = 0; $i < $aln->length; $i++ ) {
my $nm = "Site-$i";
my (@genotypes,%set);
for my $seq ( @seqs ) {
my $site = substr($seq,$i,1);
$set{$site}++;
push @genotypes, $site;
}
if( keys %set > 1 || $includefixed ) {
for( my $i = 0; $i < scalar @genotypes; $i++ ) {
$inds[$i]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => $nm,
-individual_id=> $inds[$i]->unique_id,
-alleles => [$genotypes[$i]]));
}
}
}
for my $ind ( @inds ) {
$population->add_Individual($ind);
}
} else {
$self->throw("Can only build sites based on all the data right now!");
my ($sitecount,@sites) = ($aln->length);
my @sitecat;
my (@codons,@codons_v, $codon_ct);
for( my $i = 0; $i < $sitecount; $i++ ) {
if( $i && $i % 3 == 0 ) {
for my $cod ( @{$codons[$codon_ct]} ) {
$codons_v[$codon_ct]->{$cod}++;
}
$codon_ct++;
}
my $seqct = 0;
foreach my $seq ( @seqs ) {
my $char = substr($seq,$i,1);
$sites[$i]->{'alleles'}->{$char}++;
$sites[$i]->{'seq'}->[$seqct] = $char;
$codons[$codon_ct]->[$seqct] .= $char;
$seqct++;
}
}
my ($i,$seqctr) = (0,0);
for my $site ( @sites ) {
my %alleles = %{$site->{'alleles'}};
my %codons = $codons_v[$i % 3]->[$seqctr];
$i++;
$seqctr++;
}
}
return $population;} |
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
the web:
http://bugzilla.open-bio.org/
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _