#!/usr/bin/perl -w my $usage = "\nUsage: $0 [-h] [-r RemoveColumnNumber] inputfile\n" . " Input File: tab delimited text file\n" . "1st column is individual name (name of directory)\n" . "Each column represent comma-separated cloneIDs for each allele\n"; my $consedChromatDir="chromat_dir"; my $alleleNameForRemoval="Remove"; use File::Basename; use File::Copy; use Getopt::Std; getopts('hr:') || die $usage; die $usage if (defined($opt_h)); die "$usage\nERROR: Please specify one input file describing the " . "cloneID -> alleleID\n" unless (@ARGV != 1); # purse the inputfile, and get conversion table. my @conv = ReadInConfig($ARGV[0]); my @updatedDir = (); # need to run phredPhrap for these directories # go through each individual conversion foreach my $entry (@conv) { my @cloneIDs = split /\t/, $entry; my $dirName = shift @cloneIDs; next if (@cloneIDs == 0); # test if the directory exists unless ( -d $dirName) { warn "WARN: directory: $dirName doesn't exists, ignoring...\n"; next; } # get the list of ab1 files inside my @ab1Files = GetAB1FileNames($dirName); next if (@ab1Files == 0); # assign allele destination for each filename. my @alleleAssignment = AssignAlleles(\@cloneIDs, \@ab1Files); # need error handling # prepare the destination my @allAlleles = ExtractUnique(@alleleAssignment); my $updateFlag = 0; foreach my $i (@allAlleles) { if ($i != 0 && $i != 1) { SetUpDir($dirName, $i); $updateFlag = 1; push (@updateDir, $dirName . "-a" . $i); # need to be updated } } if ($updateFlag == 1) { push (@updateDir, $dirName); # need to update the original location } # actually move the files for my $i (0..$#ab1Files) { my $dest = $alleleAssignment[$i]; my $fn = $ab1Files[$i]; if ($dest == 0) { warn "INFO: $fn didn't match with any clone name listed in " . "the input file. It will not be moved\n"; next; } elsif ($dest == 1) { next; } else { # now move the file my $newFN = $dirName . "-a" . $dest . "/$consedChromatDir/" . basename($fn); if (-x $newFN) { warn "INFO: $newFN is replaced with the newer file\n"; unlink ($newFN) || die "ERROR: Can't delete $newFN\n"; } move($fn, $newFN) || die "ERROR: can't move $fn -> $newFN\n"; } } } #if (@updateDir > 0) { # RunPhredPhrap (@updateDir); #} exit(0); sub ReadInConfig { my $file = shift; open(IN, "<$file") || die "ERROR: Can't open $file\n"; my @result = (); while() { chomp; my @line = split /\t/; for my $i (0..$#line) { # $line[$i] =~ s/^\s+//; # $line[$i] =~ s/\s+$//; $line[$i] =~ s/\s+//g; } my $entry = join ("\t", @line); push @result, $entry; } return @result; } sub GetAB1FileNames { my $dir = shift; $dir = $dir . "/" . $consedChromatDir; unless ( -d $dir) { warn "WARN: directory: $dir doesn't exists, ignoring...\n"; return (); } # read in the file names opendir (DIR, $dir) || die "ERROR: can't open $dir\n"; my $name; my @files = (); while (defined($name = readdir(DIR))) { push @files, $name ; } closedir (DIR); # make sure all of them have the ab1 files. my $numAB1files = scalar(@files); @files = SelectAB1Files (@files); $numAB1files = $numAB1files - @files; if ($numAB1files != 0) { warn "WARN: In $dir, there are $numAB1files files which are not AB1\n"; } return @files; } # only select ab1 files sub SelectAB1Files { my @result = (); foreach $file (@_) { my $id = GetIDFromFilename($file); if ($id eq "") { warn "INFO: $file does not follow the naming convention: " . "templateID.type.ab1. Ignoring ..."; next; } push @result, $file; } return @result; } sub GetIDFromFilename { my $name = shift; my $bn = basename $name; # "+?" is stingy match if ($bn =~ /^(.+?)\..*\.ab1$/) { return $1; } else { return ""; } } sub AssignAlleles { my ($cloneIDArrRef, $fileArrRef) = @_; # First, purse the cloneID -> alleleID array my %alleleHash = (); my $alleleCnt = 1; foreach my $allele (@$cloneIDArrRef) { my @clones = split /,/, $allele; foreach my $cID (@clones) { unless ($alleleHash{$cID}) { $alleleHash{$cID} = $alleleCnt; } else { die "ERROR: check the input file. $alleleHash{$cID} is " . "designeated to several alleles\n"; } } $alleleCnt ++; } # Go through each filename, and assign them to the allele destination my @result = (); foreach my $file (@$fileArrRef) { # find the potential allele destination for this file my @alleleID = (); foreach my $cID (keys(%alleleHash)) { if ($file =~ /$cID/) { push @alleleID, $cID; } } # make sure that there is no ambiguity in allele destination if (@alleleID > 1) { die "ERROR: I'm not sure how to handle the filename $file. " . "It matches with several alleles: ", join(",", @alleleID), "\n"; } elsif (@alleleID == 0) { push @result, 0; } else { push @result, $alleleID[0]; } } my $remCol = -1; $remCol = $opt_r if (defined($opt_r)); for my $i (0..$#result) { if ($result[$i] == $remCol) { $result[$i] = $alleleNameForRemoval; } } return @result; } # take a list as the argument and extract the unique elements. # The order of elements will not be preserved. sub ExtractUnique { my %seen=(); my @unique = (); foreach my $item (@_) { push (@unique, $item) unless $seen{$item}++; } return @unique; } sub MemberQ { my ($x, $arrRef) = @_; foreach my $item (@$arrRef) { if ($x eq $item) { return 1; } } return 0; } sub CntOccurrence { my ($x, $arrRef) = @_; my $cnt = 0; foreach my $item (@$arrRef) { if ($x eq $item) { $cnt++; } } return $cnt; } sub SetUpDir { my ($dirName, $alleleID) = @_; my $name = $dirName . "-a" . $alleleID; if (-x $name) { if ( -d "$name/chromat_dir" && -d "$name/edit_dir" && -d "$name/phd_dir" ) { return; # already directory exists } else { RenameFile($name, "old"); } } MakeConsedDirStructure($name); return; } sub MakeConsedDirStructure { my $name = shift; system ("mkdir -p $name/chromat_dir"); system ("mkdir -p $name/edit_dir"); system ("mkdir -p $name/phd_dir"); } sub RenameFile { my ($file, $toSuffix) = @_; my $newName = $file . "." . $toSuffix; if (-x $newName) { warn "WARN: removed $newName\n"; system("/bin/rm -rf $newName"); } move($file, $newName) || die "ERROR: move $file -> $newName failed\n"; return }