#!perl -w
use strict;
# Searching for motifs, Counting ACGT and errors
# documentation written in pod, perldoc perlpod
=head1 NAME
count.pl
=head1 DESCRIPTION
Read files and return a count of dna values.
=head1 SYNOPSIS
count.pl ...
=cut
while (my $filename = shift @ARGV) {
my $sequence = slurp($filename);
# Remove whitespace
$$sequence =~ s/\s//g;
print "$filename: ", count($sequence);
}
# count nucleotides of dna and any errors
sub count {
my ($dna) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){++$a}
while($$dna =~ /c/ig){++$c}
while($$dna =~ /g/ig){++$g}
while($$dna =~ /t/ig){++$t}
while($$dna =~ /[^acgt\s]/ig){++$e}
return "A=$a C=$c G=$g T=$t errors=$e\n";
}
# \$data = slurp($filename);
# slurp in all data into a single variable
# return a reference to avoid copying data
# Recommend: File::Slurp library for this
# This function was benchmarked with reading an array and reading lines
# On a slow PC, it quickly out paced arrays, and did better than lines.
sub slurp {
my ($filename) = @_;
my $inf;
local $/;
open($inf, "< $filename") or die("Unable to open $filename: $!");
my $buf = <$inf>;
close $inf;
return \$buf
}