#!perl -w use strict; # Searching for motifs, Counting ACGT and errors # documentation written in pod, perldoc perlpod =head1 NAME count_bench.pl =head1 DESCRIPTION Read files and return a count of dna values. =head1 SYNOPSIS count.pl <file> ... =cut use Benchmark qw/timethese cmpthese/; my @files = @ARGV; # load tests my %test = (); foreach my $file ( @files ) { $test{$file." - prefix"} = get_count($file); $test{$file." - postfix"} = get_count2($file); } my $result = timethese(-5, \%test ); cmpthese($result); exit(0); # bench counter -- does not display sub get_count { my $filename = shift; return sub { my $sequence = slurp($filename); $$sequence =~ s/\s//g; count($sequence); }; } # count functions sub count { my ($dna) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){$a++} while($$dna =~ /c/ig){$c++} while($$dna =~ /g/ig){$g++} while($$dna =~ /t/ig){$t++} while($$dna =~ /[^acgt\s]/ig){$e++} return "A=$a C=$c G=$g T=$t errors=$e\n"; } # bench counter -- does not display sub get_count2 { my $filename = shift; return sub { my $sequence = slurp($filename); $$sequence =~ s/\s//g; count2($sequence); }; } # count functions sub count2 { my ($dna) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){++$a} while($$dna =~ /c/ig){++$c} while($$dna =~ /g/ig){++$g} while($$dna =~ /t/ig){++$t} while($$dna =~ /[^acgt\s]/ig){++$e} return "A=$a C=$c G=$g T=$t errors=$e\n"; } # \$data = slurp($filename); # slurp in all data into a single variable # return a reference to avoid copying data # Recommend: File::Slurp library for this # This function was benchmarked with reading an array and reading lines # On a slow PC, it quickly out paced arrays, and did better than lines. sub slurp { my ($filename) = @_; my $inf; local $/; open($inf, "< $filename") or die("Unable to open $filename: $!"); my $buf = <$inf>; close $inf; return \$buf }