#!/usr/bin/perl -w # it runs ace2Fasta.perl with the most recent version of ace file. # then it will harvest the contigs. # Copyright 2006, Naoki Takebayashi # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA use File::Find; use File::Basename; use Getopt::Std; getopts('hr') || die $usage; die $usage if (defined($opt_h)); die "$usage\nERROR: Please specify at least one source directory\n" unless (@ARGV); my @fileName =(); my @sourceDirName; $binA2F = "ace2Fasta.perl"; # the name of executable. foreach my $dir (@ARGV) { $dir =~ s@/$@@; # Strip any trailing slash if (-d $dir) { push @sourceDirName, $dir; RunAce2Fasta($dir); } else { warn "Don't know how to handle argument '$dir', ignoring\n"; next; } } # extract the plain files and symlinks push @fileName, ListRegFilesRecursive(@sourceDirName); # get the names of *.contigs file, but excluding fasta.screen.contigs my @contigFiles = grep { /edit_dir\/[^\/]+\.contigs$/ } @fileName; @contigFiles = grep { ! /\.fasta\.screen\.contigs$/ } @contigFiles; # exclude miniassembly ace files. @contigFiles = grep { ! /^mini\.\d+\.\d+\.fasta\.screen\.ace\.\d+$/ } @contigFiles; @contigFiles = RemoveNewAndOld(@contigFiles); # We got the list of files, go through each file my %name = (); foreach my $file (@contigFiles) { if ( -z $file) { warn "WARN: the file $file is empty. Ignoring...\n"; next; } my $bn = basename($file); $bn =~ s/\.contigs$//; my $sampleID = $bn; if (defined $name{$bn}) { $sampleID = $sampleID . "." . $name{$bn}; warn "WARN: Duplicated sample names found. Using sample name " . "\"$sampleID\" for contigs in $file.\n"; $name{$bn}++; } else { $name{$bn}++; } # now modify the fasta sequence name and print out open (IN, "<$file") || die "ERROR: can't open $file\n"; while () { chomp; s/^\s+//; s/\s+$//; s/^>\s*Contig1\s*$/>$sampleID/; s/^>\s*Contig(\d+)\s*$/>${sampleID}_c$1/; print; print "\n"; } close (IN); } exit (0); # take a list of directories and returns the names of plain files and sym links sub ListRegFilesRecursive { my @names =(); find sub {push @names, $File::Find::name if (-f $_ || -l $_) }, @_; return @names; } # When phredPhrap got run multiple times, *New.contigs and *Old.contigs # get created. Remove these if correponding *.contigs exists. sub RemoveNewAndOld { my @result = (); # make a filename database my %seen = (); foreach my $key (@_) { $seen{$key} = 1; } foreach my $file (@_) { if ( $file =~ /^(.+)New.contigs$/ || $file =~ /^(.+)Old.contigs/) { next if ($seen{"$1" . ".contigs"}); #ignore these } push @result, $file; } return @result; } # take a directory name as an argument, and update *.contigs file # by running ace2Fasta.perl. sub RunAce2Fasta { my $dirName = shift; my $aceFile = FindAceFile("$dirName/edit_dir"); # get the recent version of ace if($aceFile eq "") { # no acefile warn "INFO: no ace file in $dirName, $binA2F was not run\n"; return; } system("cd $dirName/edit_dir; $binA2F $aceFile > /dev/null 2>&1"); return; } # return the most recent version of ace file in the directory. sub FindAceFile { my $dir = shift; opendir (DIR, $dir) or die "Can't opendir $dir:$!"; my $maxVersion = 1; my $name = ""; while (defined($file = readdir(DIR))) { # ignore miniaseembly ace files next if ($file =~ /^mini\.\d+\.\d+\.fasta\.screen\.ace\.\d+$/); if ($file =~ /^(.+\.fasta\.screen\.ace)\.(\d+)$/) { if ($name eq "") { $name = $1; $maxVersion = $2; } else { if ($name ne $1) { die "ERROR: more than 2 ace files with different name\n"; } if ($maxVersion < $2) { $maxVersion = $2; } } } } if ($name eq "") { return (""); } else { return ("$name.$maxVersion"); } }