#!perl -w
use strict;
# documentation written in pod, perldoc perlpod
=head1 NAME
SeqFun.pm
=head1 DESCRIPTION
Library of sequence functions
=head1 SYNOPSIS
use SeqFun qw/:all/;
use SeqFun qw/slurp count show_eg/;
=head1 USAGE
print show_eg(`podselect -s "EXAMPLES" $0`);
my $sequence = slurp($filename);
print print_dna($sequence);
print count($filename, $sequence);
=head1 NOTES
Working with sequences shows the need to work with nucleotides
and proteins, and to be able to manipulate back and forth with
various utilities.
=head1 AUTHOR
Created by David Scott to tease out a super simple library.
=cut
package SeqFun; # default namespace
BEGIN {
use Exporter();
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
$VERSION = "0.001"; # make a version for changes
$VERSION = eval $VERSION;
@ISA = qw/Exporter/; # inherit from Exporter
@EXPORT = (); # don't export without asking
# create Export tags for convenience
%EXPORT_TAGS = (
all => [ qw/ count print_dna motif show_eg slurp / ],
);
@EXPORT_OK = qw/ count print_dna motif show_eg slurp /;
}
=item my $results = count($filename, \$sequence);
return a string of dna counts including errors, and the filename
=cut
sub count {
my ($filename, $dna) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){$a++}
while($$dna =~ /c/ig){$c++}
while($$dna =~ /g/ig){$g++}
while($$dna =~ /t/ig){$t++}
while($$dna =~ /[^acgt\s]/ig){$e++}
return "$filename:\tA=$a C=$c G=$g T=$t errors=$e\n";
}
=item my $text = print_dna($dna);
return text of dna.
=cut
sub print_dna {
my $dna = shift;
return $dna . "\n";
}
=item my $text = motif($re, \$sequence, $filename);
Return the text: "$re [found|missing]: $filename\n"
after looking through the $$sequence.
=cut
sub motif {
my ($motif, $sequence, $filename ) = @_;
if ( $$sequence =~ /$motif/i ) {
return "$motif found: $filename\n";
} else {
return "$motif missing: $filename\n"
}
}
=item my $text = show_eg($text);
Return text of examples without a header
and last blank line.
=cut
sub show_eg {
my @lines = @_;
# remove 1st and last lines
pop @lines;
shift @lines;
return @lines;
}
=item \$data = slurp($filename);
slurp in all data into a single variable, return a reference
to avoid copying data.
Recommend: File::Slurp library for this function. This slurp()
was benchmarked with reading an array and reading lines on a slow PC,
it quickly out paced arrays, and did better than lines.
=cut
sub slurp {
my ($filename) = @_;
my $inf;
my $fileTerminator = $/;
undef $/;
open($inf, "< $filename") or die("Unable to open $filename: $!");
my $buf = <$inf>;
close $inf;
$/ = $fileTerminator;
return \$buf
}
1; # successful -- library loaded