sub aln_to_population
{ my ($self,@args) = @_;
my ($aln,
$sitemodel,$phase,
$includefixed,$checkisa) = $self->_rearrange([qw(ALIGNMENT
SITE_MODEL
PHASE
INCLUDE_MONOMORPHIC
CHECKISA)],
@args);
my %ambig_code = ('?' => ['?','?'],
'N' => ['?','?'],
'-' => ['?','?'],
'G' => ['G','G'],
'A' => ['A','A'],
'T' => ['T','T'],
'C' => ['C','C'],
'R' => ['A','G'],
'Y' => ['C','T'],
'W' => ['T','A'],
'M' => ['C','A'],
'S' => ['C','G'],
'K' => ['G','T']);
if( ! defined $aln ) {
$self->warn("Must provide a valid Bio::SimpleAlign object to run aln_to_population");
return;
}
if( ! $aln->is_flush ) {
$self->warn("Must provide a Bio::SimpleAlign object with aligned sequences to aln_to_population!");
return;
}
$phase = 0 unless defined $phase;
if( $phase != 0 && $phase != 1 && $phase != 2 ) {
warn("phase must be 0,1, or 2");
return;
}
my $alength = $aln->length;
my @inds;
if( ! defined $sitemodel || $sitemodel =~ /all/i ) {
my $ct = 0;
my @seqs;
for my $seq ( $aln->each_seq ) {
push @seqs, $seq->seq;
push @inds, Bio::PopGen::Individual->new(-unique_id => $seq->display_id);
}
for( my $i = 0; $i < $alength; $i++ ) {
my (@genotypes,%set);
for my $seq ( @seqs ) {
my $site = uc(substr($seq,$i,1));
push @genotypes, $ambig_code{$site};
$set{$site}++;
}
if( keys %set > 1 || $includefixed ) {
my $genoct = scalar @genotypes;
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => ($i+1),
-individual_id=> $inds[$j]->unique_id,
-alleles => $genotypes[$j]));
}
}
}
} elsif( $sitemodel =~ /cod(on)?/i ) {
my $ct = 0;
my @seqs;
for my $seq ( $aln->each_seq ) {
push @seqs, $seq->seq;
push @inds, Bio::PopGen::Individual->new(-unique_id => $seq->display_id);
}
my $codonct = 0;
for( my $i = $phase; $i < $alength; $i += CodonLen ) {
my (@genotypes,%set,$genoct);
for my $seq ( @seqs ) {
my @unambig_site;
my $site = uc(substr($seq,$i,CodonLen));
if( length($site) < CodonLen ) {
$self->debug("phase was $phase, but got to end of alignment with overhang of $site");
next;
}
for (my $pos=0; $pos<CodonLen; $pos++)
{
$unambig_site[0] .= $ambig_code{substr($site, $pos, 1)}[0];
$unambig_site[1] .= $ambig_code{substr($site, $pos, 1)}[1];
}
push @genotypes, [@unambig_site];
$set{$site}++;
}
$genoct = scalar @genotypes;
if( keys %set > 1 || $includefixed ) {
for( my $j = 0; $j < $genoct; $j++ ) {
$inds[$j]->add_Genotype(Bio::PopGen::Genotype->new
(-marker_name => ($i/CodonLen), -individual_id=> $inds[$j]->unique_id, -alleles => $genotypes[$j])); }
$codonct++;
}
}
} else {
$self->throw("Can only build sites based on all the data right now!");
}
return Bio::PopGen::Population->new(-checkisa => 0,
-source => 'alignment',
-individuals=>\@ inds);
}
1;} |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _