#!perl -w
use strict;
# documentation written in pod, perldoc perlpod
=head1 NAME
SeqFun.pm
=head1 DESCRIPTION
Library of sequence functions from Chapters 4 and 5.
=head1 SYNOPSIS
use SeqFun qw/:all/;
use SeqFun qw/slurp count show_eg/;
use SeqFun qw/:dna/;
=head1 USAGE
use English; # get $PROGRAM_NAME, a special variable
show_eg(`podselect -s "EXAMPLES" $PROGRAM_NAME`);
my $sequence = slurp($filename);
print count($filename, $sequence);
my $dna = concatenate("AGCT","CATGCCTAAAATA");
my $rna = dna2rna($dna);
my $seq = revcom($dna)
=head1 NOTES
Working with sequences shows the need to work with nucleotides
and proteins, and to be able to manipulate back and forth with
various utilities.
=head1 AUTHOR
Written by David Scott to tease out a super simple library.
=cut
# Chapters 4,5 in functions
package SeqFun; # default namespace
BEGIN {
use Exporter();
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
$VERSION = "0.001"; # make a version for changes
$VERSION = eval $VERSION;
@ISA = qw/Exporter/; # inherit from Exporter
@EXPORT = (); # don't export without asking
# create Export tags for convenience
%EXPORT_TAGS = (
all => [ qw/ concatenate count dna2rna print_dna motif
read_file revcom show_eg slurp / ],
dna => [ qw/ concatenate dna2rna print_dna
revcom slurp / ],
);
@EXPORT_OK = qw/ concatenate count dna2rna print_dna motif read_file
revcom show_eg slurp /;
}
=item my $seq = concatenate($dna1, $dna2, ... );
Return a sequence which concatenates any dna sequences
passed in.
=cut
# Concatenate the DNA fragments Using "string interpolation"
sub concatenate {
my (@dna) = @_;
my $result = shift @dna;
foreach (@dna) {
$result .= $_;
}
return $result;
}
=item my $results = count($filename, \$sequence);
return a string of dna counts including errors, and the filename
=cut
sub count {
my ($filename, $dna) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){$a++}
while($$dna =~ /c/ig){$c++}
while($$dna =~ /g/ig){$g++}
while($$dna =~ /t/ig){$t++}
while($$dna =~ /[^acgt\s]/ig){$e++}
return "$filename:\tA=$a C=$c G=$g T=$t errors=$e\n";
}
=item my $rna = dna2rna($dna);
Transcribe the DNA to RNA by substituting all T's with U's.
=cut
sub dna2rna {
my $rna = shift;
$rna =~ s/T/U/g;
return $rna;
}
=item my $text = print_dna($dna);
return text of dna.
=cut
sub print_dna {
my $dna = shift;
return $dna . "\n";
}
=item my $text = motif($re, \$sequence, $filename);
Return the text: "$re [found|missing]: $filename\n"
after looking through the $$sequence.
=cut
sub motif {
my ($motif, $sequence, $filename ) = @_;
if ( $$sequence =~ /$motif/i ) {
return "$motif found: $filename\n";
} else {
return "$motif missing: $filename\n"
}
}
=item my @lines = read_file($filename)
Return an array of lines in the $filename.
=cut
sub read_file {
my ($filename) = @_;
# First we have to "open" the file
open(FILE, $filename);
# Read the data from the file, and store it
# into the array variable @lines
my @lines = ;
close FILE;
return @lines;
}
=item my $seq = revcom($dna)
Return the reverse complement of the dna $sequence
=cut
sub revcom {
my $dna = shift;
my $revcom = reverse $dna;
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return $revcom;
}
=item my $text = show_eg($text);
Exit with text of examples without a header
and remove last blank line.
=cut
sub show_eg {
my @lines = @_;
# remove 1st and last lines
pop @lines;
shift @lines;
print @lines;
exit (0);
}
=item \$data = slurp($filename);
slurp in all data into a single variable, return a reference
to avoid copying data.
Recommend: File::Slurp library for this function. This slurp()
was benchmarked with reading an array and reading lines on a slow PC,
it quickly out paced arrays, and did better than lines.
=cut
sub slurp {
my ($filename) = @_;
my $inf;
my $fileTerminator = $/;
undef $/;
open($inf, "< $filename") or die("Unable to open $filename: $!");
my $buf = <$inf>;
close $inf;
$/ = $fileTerminator;
return \$buf
}
1; # successful -- library loaded