This object can transform
Bio::SimpleAlign objects to and from
'po' format flat file databases. 'po' format is the native format of
the POA alignment program (Lee C, Grasso C, Sharlow MF, 'Multiple
sequence alignment using partial order graphs', Bioinformatics (2002),
18(3):452-64).
sub next_aln
{ my $self = shift;
my $aln;
my $entry;
my $name;
my $seqs;
my $seq;
my $nodes;
my $list;
my $node;
my @chars;
my $s;
my $a;
$aln = Bio::SimpleAlign->new();
while(defined($entry = $self->_readline)) {
if($entry =~ /^VERSION=(\S+)/) {
$aln->source($1);
if(defined($entry = $self->_readline) and $entry =~ /^NAME=(\S+)/) {
$aln->id($1);
}
last;
}
}
$seqs = [];
$nodes = [];
while(defined($entry = $self->_readline)) {
if($entry =~ /^VERSION/) {
$self->_pushback($entry);
last;
}
elsif($entry =~ /^SOURCENAME=(\S+)/) {
$name = $1;
if($name =~ /(\S+)\/(\d+)-(\d+)/) {
$seq = Bio::LocatableSeq->new(
'-display_id' => $1,
'-start' => $2,
'-end' => $3,
'-alphabet' => $self->alphabet,
);
} else {
$seq = Bio::LocatableSeq->new('-display_id'=> $name,
'-alphabet' => $self->alphabet);
}
push @{$seqs}, {
'seq' => $seq,
'str' => '',
};
}
elsif($entry =~ /^SOURCEINFO=(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)/) {
$seq->desc($5);
}
elsif($entry =~ /^(\S):(\S+)/) {
$node = {
'aa' => $1,
'L' => [],
'S' => [],
'A' => [],
'status' => 'unvisited',
};
$list = $2;
if($list =~ /^([L\d]*)([S\d]*)([A\d]*)/) {
push(@{$node->{'L'}}, split(/L/, $1));
push(@{$node->{'S'}}, split(/S/, $2));
push(@{$node->{'A'}}, split(/A/, $3));
(@{$node->{'L'}} > 0) and shift @{$node->{'L'}};
(@{$node->{'S'}} > 0) and shift @{$node->{'S'}};
(@{$node->{'A'}} > 0) and shift @{$node->{'A'}};
}
push @{$nodes}, $node;
}
}
foreach $node (@{$nodes}) {
($node->{'status'} ne 'unvisited') and next;
@chars = ($aln->gap_char) x @{$seqs};
foreach $s (@{$node->{'S'}}) {
$chars[$s] = $node->{'aa'};
}
$node->{'status'} = 'visited';
while(defined($a = $node->{'A'}->[0])) {
$node = $nodes->[$a];
($node->{'status'} ne 'unvisited') and last;
foreach $s (@{$node->{'S'}}) {
$chars[$s] = $node->{'aa'};
}
$node->{'status'} = 'visited';
}
foreach $seq (@{$seqs}) {
$seq->{'str'} .= shift @chars;
}
}
foreach $seq (@{$seqs}) {
$seq->{'seq'}->seq($seq->{'str'});
$aln->add_seq($seq->{'seq'});
}
return $aln if $aln->num_sequences;
return;} |
sub write_aln
{ my $self = shift;
my @alns = @_;
my $aln;
my $seqs;
my $nodes;
my $seq;
my $node;
my $col;
my $ring;
my $i;
my $char;
foreach $aln (@alns) {
if(!$aln or !$aln->isa('Bio::Align::AlignI')) {
$self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
next;
}
$seqs = [];
foreach $seq ($aln->each_seq()) {
push @{$seqs}, {
'seq' => $seq,
'n_nodes' => 0,
'first' => undef,
'previous' => undef,
};
}
$nodes = [];
for($col = 0; $col < $aln->length; $col++) {
$ring = {
'nodes' => {},
'first' => scalar @{$nodes},
'last' => scalar @{$nodes},
};
for($i = 0; $i < @{$seqs}; $i++) {
$seq = $seqs->[$i];
$char = $seq->{'seq'}->subseq($col + 1, $col + 1);
($char eq $aln->gap_char) and next;
if(!defined($node = $ring->{'nodes'}->{$char})) {
$node = {
'n' => scalar @{$nodes},
'aa' => $char,
'L' => {},
'S' => [],
'A' => [],
};
$ring->{'nodes'}->{$char} = $node;
$ring->{'last'} = $node->{'n'};
push @{$nodes}, $node;
}
push @{$node->{'S'}}, $i;
defined($seq->{'first'}) or ($seq->{'first'} = $node);
$seq->{'n_nodes'}++;
defined($seq->{'previous'}) and ($node->{'L'}->{$seq->{'previous'}->{'n'}} = $seq->{'previous'});
$seq->{'previous'} = $node;
}
if($ring->{'first'} < $ring->{'last'}) {
for($i = $ring->{'first'}; $i < $ring->{'last'}; $i++) {
push @{$nodes->[$i]->{'A'}}, $i + 1;
}
push @{$nodes->[$ring->{'last'}]->{'A'}}, $ring->{'first'};
}
}
$self->_print(
'VERSION=', ($aln->source and ($aln->source !~ /\A\s*\Z/)) ? $aln->source : 'bioperl', "\n",
'NAME=', $aln->id, "\n",
'TITLE=', ($seqs->[0]->{'seq'}->description or $aln->id), "\n",
'LENGTH=', scalar @{$nodes}, "\n",
'SOURCECOUNT=', scalar @{$seqs}, "\n",
);
foreach $seq (@{$seqs}) {
$self->_print(
'SOURCENAME=', $seq->{'seq'}->display_id, "\n",
'SOURCEINFO=',
$seq->{'n_nodes'}, ' ', $seq->{'first'}->{'n'}, ' ', 0, ' ', -1, ' ', ($seq->{'seq'}->description or 'untitled'), "\n",
);
}
foreach $node (@{$nodes}) {
$self->_print($node->{'aa'}, ':');
(keys %{$node->{'L'}} > 0) and $self->_print('L', join('L', sort {$a <=> $b} keys %{$node->{'L'}}));
(@{$node->{'S'}} > 0) and $self->_print('S', join('S', @{$node->{'S'}}));
(@{$node->{'A'}} > 0) and $self->_print('A', join('A', @{$node->{'A'}}));
$self->_print("\n");
}
}
$self->flush if $self->_flush_on_write && defined $self->_fh;
return 1;
}
1;} |