#!/usr/bin/perl my $usage="Usage: $0 [-hmt] [codeml_output]\n" . " -m: the codeml_output contains multiple genes\n" . " -t: eliminate the termination codons\n" . "Extract the codon usage counts table from codeml_output.\n" . "Order of bases: T C A G\n"; use Getopt::Std; getopts('hmt') || die "$usage\n"; die "$usage\n" if (defined($opt_h)); $tableStart = (defined($opt_m)) ? 'Codon usage in genes' : 'Sums of codon usage counts'; @doc = <>; my @matchingIndex = IndexOfMatchingPattern (\@doc, $tableStart); if (@matchingIndex != 1) { die "ERROR: document contains " . scalar(@matchingIndex) . " lines " . "which match the pattern. The pattern we are looking for is:\n". "$tableStart\nSpecify another unique signal by modifying a " . "variable \$tableStart\n"; } my $tbl1st = @matchingIndex[0]; die "ERROR: line " . $tbl1st+3 . "is supposed to contains the very 1st line " . "of data. But it doesn't follow the codon Tbl format\n" unless ($doc[$tbl1st+1] =~ /^-+\n$/ && $doc[$tbl1st+6] =~ /^-+\n$/ && $doc[$tbl1st+11] =~ /^-+\n$/ && $doc[$tbl1st+16] =~ /^-+\n$/ && $doc[$tbl1st+21] =~ /^-+\n$/); $tbl1st += 2; # now pointing to the 1st data my $numCol = -1; my @codonTbl = (); for my $i (0..3, 5..8, 10..13, 15..18) { my $dat = $doc[$tbl1st + $i]; $dat =~ s/[a-zA-Z\*\|]//g; $dat =~ s/^\s+//; $dat =~ s/\s+$//; my @line = split /\s+/, $dat; if ($numCol == -1) { $numCol = @line; } else { die "ERROR: CodonTable doesn't have same number of columns\n" if ($numCol != @line); } push @codonTbl, [ @line ]; } my $numGenes = int($numCol / 4); my @result =(); for my $gene (0..($numGenes-1)) { my @cnt = (); my $col; for ($col = $gene; $col < $numCol; $col += $numGenes) { for my $row (0..15) { push @cnt, $codonTbl[$row][$col]; } } my @cntReordered = (); for my $i ( 0..3 , 16..19, 32..35, 48..51, 4..7 , 20..23, 36..39, 52..55, 8..11, 24..27, 40..43, 56..59, 12..15, 28..31, 44..47, 60..63) { push @cntReordered, $cnt[$i]; } if (defined($opt_t)) { # remove termination codons splice(@cntReordered, 14,1); splice(@cntReordered, 10,2); } my $tmp = join ",", @cntReordered; push @result, $tmp; } print join "\n", @result; print "\n"; exit(0); sub IndexOfMatchingPattern { my ($arrayRef, $pattern) = @_; my @indexArray = (); for my $i (0..(scalar(@$arrayRef)-1)) { my $line = $$arrayRef[$i]; if ($line =~ /$pattern/) { push @indexArray, $i; } } return @indexArray; }