# do not use this object directly it is accessed through the Bio::MapIO system
use Bio::MapIO;
-format : specifies the format of the file format is "fpc",
-file : specifies the name of the .fpc file
-readcor : boolean argument, indicating if .cor is to be read
or not. It looks for the .cor file in the same path
as .fpc file.
0 : doesn't read .cor file
1 : reads the .cor file
[default 0]
-verbose : indicates the process of loading of fpc file
my $mapio = Bio::MapIO->new(-format => "fpc",
-file => "rice.fpc",
-readcor => 0,
-verbose => 0);
my $map = $mapio->next_map();
foreach my $marker ( $map->each_markerid() ) {
# loop through the markers associated with the map
# likewise for contigs, clones, etc.
}
This object contains code for parsing and processing FPC files and creating
Bio::Map::Physical object from it.
For faster access and better optimization, the data is stored internally in
hashes. The corresponding objects are created on request.
We handle reading of the FPC ourselves, since MapIO module of Bioperl adds
too much overhead.
sub _initialize
{ my ($self,@args) = @_;
my $species;
$self->SUPER::_initialize(@args);
($species,$_readcor) = $self->_rearrange([qw(SPECIES READCOR)], @args);
$_readcor = 0 unless (defined($_readcor));} |
sub next_map
{
my ($self) = @_;
my $line;
my ($name,$fpcver,$moddate,$moduser,$contigcnt,$clonecnt,$markerscnt,
$bandcnt,$marker,$seqclone);
my ($corfile,$corindex,$BUFFER);
my @cordata;
my %fpcmarker;
my ($contig, $contigNumber);
my $curClone = 0;
my $curMarker = 0;
my $curContig = 0;
my %_clones;
my %_markers;
my %_contigs;
my $ctgzeropos = 1;
my $map = Bio::Map::Physical->new('-units' => 'CB',
'-type' => 'physical');
my $filename = $self->file();
my $fh = $self->{'_filehandle'};
if (defined($_readcor)) {
$map->core_exists($_readcor);
}
else {
$map->core_exists(0);
}
if ($map->core_exists()) {
$corfile = substr($filename,0,length($filename)-3)."cor";
if (open(CORE,$corfile)) {
while(read(CORE,$BUFFER,2)) {
push(@cordata,unpack('n*', $BUFFER));
}
}
else {
$map->core_exists(0);
}
}
while (defined($line = <$fh>)) {
chomp($line);
if ($line =~ m{^//\s+fpc\s+project\s+(.+)}) { $map->name($1); }
if ($line =~ m{^//\s+([\d.]+)}) {
my $version = $1;
$version =~ /((\d+)\.(\d+))(.*)/;
$map->version($1);
if ($line =~ /User:\s+(.+)/) { $map->modification_user($1); }
}
if ($line =~ m{^//\s+Framework\s+(\w+)\s+(\w+)\s+([-\w]+)\s+(\w+)\s+(\w+)\s+(.+)$})
{
$map->group_type($3) if ($2 eq "Label");
$map->group_abbr($5) if ($4 eq "Abbrev");
}
last unless ($line =~ m{^//});
}
if (!defined($map->group_type()) || !defined($map->group_abbr()) ) {
$map->group_type("Chromosome");
$map->group_abbr("Chr");
}
$_contigs{0}{'range'}{'end'} = 0;
$_contigs{0}{'range'}{'start'} = 0;
while (defined($line = <$fh>)) {
$marker = 0;
$contig = 0;
$seqclone = 0;
$contigNumber = 0;
my ($type,$name);
my (@amatch,@pmatch,@ematch);
my $bandsread = 0;
last if ($line =~ /^Markerdata/);
$line =~ /^(\w+)\s+:\s+"(.+)"/;
($type, $name) = ($1, $2);
if ($name =~ /sd1/) {
$seqclone = 1;
}
$_clones{$name}{'type'} = $type;
$_clones{$name}{'contig'} = 0;
$_contigs{'0'}{'clones'}{$name} = 0;
my $temp;
while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
if ($line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([-\d]+)/) {
$_clones{$name}{'contig'} = $1;
$_contigs{$1}{'clones'}{$name} = 0;
delete($_contigs{'0'}{'clones'}{$name});
$temp = $3;
$contigNumber = $1;
$line = <$fh>;
$line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([\d]+)/;
$_clones{$name}{'range'}{'start'} = $temp;
$_contigs{$contigNumber}{'range'}{'start'} = $temp
if (!exists($_contigs{$contigNumber}{'range'}{'start'})
|| $_contigs{$contigNumber}{'range'}{'start'}
> $temp );
$_clones{$name}{'range'}{'end'} = $3;
$_contigs{$contigNumber}{'range'}{'end'} = $3
if (!exists($_contigs{$contigNumber}{'range'}{'end'})
|| $_contigs{$contigNumber}{'range'}{'end'} < $3 );
}
elsif ($line =~ /^([a-zA-Z]+)_match_to_\w+\s+"(.+)"/) {
my $matchtype = "match" . lc(substr($1, 0, 1));
$_clones{$name}{$matchtype}{$2} = 0;
}
elsif ($line =~ /^Positive_(\w+)\s+"(.+)"/) {
$_clones{$name}{'markers'}{$2} = 0;
$_markers{$2}{'clones'}{$name} = 0;
$_markers{$2}{'type'} = $1;
$_markers{$2}{'contigs'}{$contigNumber} = 0;
$_contigs{$contigNumber}{'markers'}{$2} = 0;
}
elsif ($line =~ /^Bands\s+(\d+)\s+(\d+)/ && !$bandsread) {
my $i = 0;
my @numbands;
$bandsread = 1;
if ($map->core_exists()) {
while($i<$2){
push(@numbands,$cordata[($1-1)+$i]);
$i++;
}
$_clones{$name}{'bands'} =\@ numbands;
}
else {
push(@numbands,$1,$2);
$_clones{$name}{'bands'} =\@ numbands;
}
if (exists($_contigs{0}{'clones'}{$name})) {
$_clones{$name}{'range'}{'start'} = $ctgzeropos;
$_clones{$name}{'range'}{'end'} = $ctgzeropos + $2;
$_contigs{0}{'range'}{'end'} = $ctgzeropos + $2;
$ctgzeropos += $2;
}
}
elsif ($line =~ /^Gel_number\s+(.+)/) {
$_clones{$name}{'gel'} = $1;
}
elsif ($line =~ /^Remark\s+"(.+)"/) {
$_clones{$name}{'remark'} .= $1;
$_clones{$name}{'remark'} .= "\n";
if($seqclone == 1 ) {
if( $1 =~ /\,\s+Chr(\d+)\s+/){
$_clones{$name}{'group'} = $1;
}
}
}
elsif ($line =~ /^Fp_number\s+"(.+)"/) {
$_clones{$name}{'fp_number'} = $1;
}
elsif ($line =~ /^Shotgun\s+(\w+)\s+(\w+)/) {
$_clones{$name}{'sequence_type'} = $1;
$_clones{$name}{'sequence_status'} = $2;
}
elsif ($line =~ /^Fpc_remark\s+"(.+)"/) {
$_clones{$name}{'fpc_remark'} .= $1;
$_clones{$name}{'fpc_remark'} .= "\n";
}
}
$curClone++;
print "Adding clone $curClone...\n\r"
if ($self->verbose() && $curClone % 1000 == 0);
}
$map->_setCloneRef(\%_clones);
$line = <$fh>;
while (defined($line = <$fh>) && $line !~ /Contigdata/) {
my ($type,$name);
last if ($line !~ /^Marker_(\w+)\s+:\s+"(.+)"/);
($type, $name) = ($1, $2);
$_markers{$name}{'type'} = $type;
$_markers{$name}{'group'} = 0;
$_markers{$name}{'global'} = 0;
$_markers{$name}{'anchor'} = 0;
while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
if ($line =~ /^Global_position\s+([\d.]+)\s*(Frame)?/) {
my $position = $1 - floor($1/1000)*1000; $position = sprintf("%.2f",$position);
$_markers{$name}{'global'} = $position;
$_markers{$name}{'group'} = floor($1/1000); $_markers{$name}{'anchor'} = 1;
if(defined($2)) {
$_markers{$name}{'framework'} = 1;
}
else {
$_markers{$name}{'framework'} = 0;
}
}
elsif ($line =~ /^Anchor_bin\s+"([\w\d.]+)"/) {
my $grpmatch = $1;
my $grptype = $map->group_type();
$grpmatch =~ /(\d+|\w)(.*)/;
my ($group,$subgroup);
$group = $1;
$subgroup = $2;
$subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
$_markers{$name}{'group'} = $group;
$_markers{$name}{'subgroup'} = $subgroup;
}
elsif ($line =~ /^Anchor_pos\s+([\d.]+)\s+(F|P)?/){
$_markers{$name}{'global'} = $1;
$_markers{$name}{'anchor'} = 1;
if ($2 eq 'F') {
$_markers{$name}{'framework'} = 1;
}
else {
$_markers{$name}{'framework'} = 0;
}
}
elsif ($line =~ /^anchor$/) {
$_markers{$name}{'anchor'} = 1;
}
elsif ($line =~ /^Remark\s+"(.+)"/) {
$_markers{$name}{'remark'} .= $1;
$_markers{$name}{'remark'} .= "\n";
}
}
$curMarker++;
print "Adding Marker $curMarker...\n"
if ($self->verbose() && $curMarker % 1000 == 0);
}
$map->_setMarkerRef(\%_markers);
my $ctgname;
my $grpabbr = $map->group_abbr();
my $chr_remark;
$_contigs{0}{'group'} = 0;
while (defined($line = <$fh>)) {
if ($line =~ /^Ctg(\d+)/) {
$ctgname = $1;
$_contigs{$ctgname}{'group'} = 0;
$_contigs{$ctgname}{'anchor'} = 0;
$_contigs{$ctgname}{'position'} = 0;
if ($line =~ /#\w*(.*)\w*$/) {
$_contigs{$ctgname}{'remark'} = $1;
if ($line =~ /#\s+Chr(\d+)\s+/) {
$_contigs{$ctgname}{'group'} = $1;
$_contigs{$ctgname}{'anchor'} = 1;
}
}
}
elsif ($line =~ /^Chr_remark\s+"(-|\+|Chr(\d+))\s+(.+)"$/) {
$_contigs{$ctgname}{'anchor'} = 1;
$_contigs{$ctgname}{'chr_remark'} = $3 if(defined($3));
if (defined($2)) {
$_contigs{$ctgname}{'group'} = $2;
}
else {
$_contigs{$ctgname}{'group'} = "?";
}
}
elsif ($line =~ /^User_remark\s+"(.+)"/) {
$_contigs{$ctgname}{'usr_remark'} = $1;
}
elsif ($line =~ /^Trace_remark\s+"(.+)"/) {
$_contigs{$ctgname}{'trace_remark'} = $1;
}
elsif ($grpabbr && $line =~ /^Chr_remark\s+"(\W|$grpabbr((\d+)|(\w+)|([.\w\d]+)))\s*(\{(.*)\}|\[(.*)\])?"\s+(Pos\s+((\d.)+|NaN))(NOEDIT)?/)
{
my $grpmatch = $2;
my $pos = $10;
if ($pos eq "NaN") {
$pos = 0;
print "Warning: Nan encountered for Contig position\n ";
}
$_contigs{$ctgname}{'chr_remark'} = $6;
$_contigs{$ctgname}{'position'} = $pos;
$_contigs{$ctgname}{'subgroup'} = 0;
if (defined($grpmatch)) {
$_contigs{$ctgname}{'anchor'} = 1;
if ($grpmatch =~ /((\d+)((\D\d.\d+)|(.\d+)))|((\w+)(\.\d+))/) {
my ($group,$subgroup);
$group = $2 if($grpabbr eq "Chr");
$subgroup = $3 if($grpabbr eq "Chr");
$group = $7 if($grpabbr eq "Lg");
$subgroup = $8 if($grpabbr eq "Lg");
$subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
$_contigs{$ctgname}{'group'} = $group;
$_contigs{$ctgname}{'subgroup'} = $subgroup;
}
else {
$_contigs{$ctgname}{'group'} = $grpmatch;
}
}
else {
$_contigs{$ctgname}{'anchor'} = 1;
$_contigs{$ctgname}{'group'} = "?";
}
}
$curContig++;
print "Adding Contig $curContig...\n"
if ($self->verbose() && $curContig % 100 == 0);
}
$map->_setContigRef(\%_contigs);
$map->_calc_markerposition();
$map->_calc_contigposition() if ($map->version() < 7.0);
$map->_calc_contiggroup() if ($map->version() == 4.6);
return $map;} |
The project was done in Arizona Genomics Computational Laboratory
(AGCoL) at University of Arizona.
This work was funded by USDA-IFAFS grant #11180 titled "Web Resources
for the Computation and Display of Physical Mapping Data".
For more information on this project, please refer:
http://www.genome.arizona.edu
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _