#!perl -w
use strict;
# Searching for motifs, Counting ACGT and errors
# documentation written in pod, perldoc perlpod
=head1 NAME
count_bench.pl
=head1 DESCRIPTION
Read files and return a count of dna values.
=head1 SYNOPSIS
count.pl ...
=cut
use Benchmark qw/timethese cmpthese/;
my @files = @ARGV;
# load tests
my %test = ();
foreach my $file ( @files ) {
$test{$file." - prefix"} = get_count($file);
$test{$file." - postfix"} = get_count2($file);
}
my $result = timethese(-5, \%test );
cmpthese($result);
exit(0);
# bench counter -- does not display
sub get_count {
my $filename = shift;
return sub {
my $sequence = slurp($filename);
$$sequence =~ s/\s//g;
count($sequence);
};
}
# count functions
sub count {
my ($dna) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){$a++}
while($$dna =~ /c/ig){$c++}
while($$dna =~ /g/ig){$g++}
while($$dna =~ /t/ig){$t++}
while($$dna =~ /[^acgt\s]/ig){$e++}
return "A=$a C=$c G=$g T=$t errors=$e\n";
}
# bench counter -- does not display
sub get_count2 {
my $filename = shift;
return sub {
my $sequence = slurp($filename);
$$sequence =~ s/\s//g;
count2($sequence);
};
}
# count functions
sub count2 {
my ($dna) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){++$a}
while($$dna =~ /c/ig){++$c}
while($$dna =~ /g/ig){++$g}
while($$dna =~ /t/ig){++$t}
while($$dna =~ /[^acgt\s]/ig){++$e}
return "A=$a C=$c G=$g T=$t errors=$e\n";
}
# \$data = slurp($filename);
# slurp in all data into a single variable
# return a reference to avoid copying data
# Recommend: File::Slurp library for this
# This function was benchmarked with reading an array and reading lines
# On a slow PC, it quickly out paced arrays, and did better than lines.
sub slurp {
my ($filename) = @_;
my $inf;
local $/;
open($inf, "< $filename") or die("Unable to open $filename: $!");
my $buf = <$inf>;
close $inf;
return \$buf
}