SeqFun.pm

#!perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME SeqFun.pm =head1 DESCRIPTION Library of sequence functions =head1 SYNOPSIS use SeqFun qw/:all/; use SeqFun qw/slurp count show_eg/; =head1 USAGE print show_eg(`podselect -s "EXAMPLES" $0`); my $sequence = slurp($filename); print print_dna($sequence); print count($filename, $sequence); =head1 NOTES Working with sequences shows the need to work with nucleotides and proteins, and to be able to manipulate back and forth with various utilities. =head1 AUTHOR Created by David Scott to tease out a super simple library. =cut package SeqFun; # default namespace BEGIN { use Exporter(); our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); $VERSION = "0.001"; # make a version for changes $VERSION = eval $VERSION; @ISA = qw/Exporter/; # inherit from Exporter @EXPORT = (); # don't export without asking # create Export tags for convenience %EXPORT_TAGS = ( all => [ qw/ count print_dna motif show_eg slurp / ], ); @EXPORT_OK = qw/ count print_dna motif show_eg slurp /; } =item my $results = count($filename, \$sequence); return a string of dna counts including errors, and the filename =cut sub count { my ($filename, $dna) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){$a++} while($$dna =~ /c/ig){$c++} while($$dna =~ /g/ig){$g++} while($$dna =~ /t/ig){$t++} while($$dna =~ /[^acgt\s]/ig){$e++} return "$filename:\tA=$a C=$c G=$g T=$t errors=$e\n"; } =item my $text = print_dna($dna); return text of dna. =cut sub print_dna { my $dna = shift; return $dna . "\n"; } =item my $text = motif($re, \$sequence, $filename); Return the text: "$re [found|missing]: $filename\n" after looking through the $$sequence. =cut sub motif { my ($motif, $sequence, $filename ) = @_; if ( $$sequence =~ /$motif/i ) { return "$motif found: $filename\n"; } else { return "$motif missing: $filename\n" } } =item my $text = show_eg($text); Return text of examples without a header and last blank line. =cut sub show_eg { my @lines = @_; # remove 1st and last lines pop @lines; shift @lines; return @lines; } =item \$data = slurp($filename); slurp in all data into a single variable, return a reference to avoid copying data. Recommend: File::Slurp library for this function. This slurp() was benchmarked with reading an array and reading lines on a slow PC, it quickly out paced arrays, and did better than lines. =cut sub slurp { my ($filename) = @_; my $inf; my $fileTerminator = $/; undef $/; open($inf, "< $filename") or die("Unable to open $filename: $!"); my $buf = <$inf>; close $inf; $/ = $fileTerminator; return \$buf } 1; # successful -- library loaded