#!perl -w use strict; # Searching for motifs, Counting ACGT and errors # documentation written in pod, perldoc perlpod =head1 NAME count.pl =head1 DESCRIPTION Read files and return a count of dna values. =head1 SYNOPSIS count.pl <file> ... =cut while (my $filename = shift @ARGV) { my $sequence = slurp($filename); # Remove whitespace $$sequence =~ s/\s//g; print "$filename: ", count($sequence); } # count nucleotides of dna and any errors sub count { my ($dna) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){++$a} while($$dna =~ /c/ig){++$c} while($$dna =~ /g/ig){++$g} while($$dna =~ /t/ig){++$t} while($$dna =~ /[^acgt\s]/ig){++$e} return "A=$a C=$c G=$g T=$t errors=$e\n"; } # \$data = slurp($filename); # slurp in all data into a single variable # return a reference to avoid copying data # Recommend: File::Slurp library for this # This function was benchmarked with reading an array and reading lines # On a slow PC, it quickly out paced arrays, and did better than lines. sub slurp { my ($filename) = @_; my $inf; local $/; open($inf, "< $filename") or die("Unable to open $filename: $!"); my $buf = <$inf>; close $inf; return \$buf }