SeqFun.pm

#!perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME SeqFun.pm =head1 DESCRIPTION Library of sequence functions from Chapters 4 and 5. =head1 SYNOPSIS use SeqFun qw/:all/; use SeqFun qw/slurp count show_eg/; use SeqFun qw/:dna/; =head1 USAGE use English; # get $PROGRAM_NAME, a special variable show_eg(`podselect -s "EXAMPLES" $PROGRAM_NAME`); my $sequence = slurp($filename); print count($filename, $sequence); my $dna = concatenate("AGCT","CATGCCTAAAATA"); my $rna = dna2rna($dna); my $seq = revcom($dna) =head1 NOTES Working with sequences shows the need to work with nucleotides and proteins, and to be able to manipulate back and forth with various utilities. =head1 AUTHOR Written by David Scott to tease out a super simple library. =cut # Chapters 4,5 in functions package SeqFun; # default namespace BEGIN { use Exporter(); our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); $VERSION = "0.001"; # make a version for changes $VERSION = eval $VERSION; @ISA = qw/Exporter/; # inherit from Exporter @EXPORT = (); # don't export without asking # create Export tags for convenience %EXPORT_TAGS = ( all => [ qw/ concatenate count dna2rna print_dna motif read_file revcom show_eg slurp / ], dna => [ qw/ concatenate dna2rna print_dna revcom slurp / ], ); @EXPORT_OK = qw/ concatenate count dna2rna print_dna motif read_file revcom show_eg slurp /; } =item my $seq = concatenate($dna1, $dna2, ... ); Return a sequence which concatenates any dna sequences passed in. =cut # Concatenate the DNA fragments Using "string interpolation" sub concatenate { my (@dna) = @_; my $result = shift @dna; foreach (@dna) { $result .= $_; } return $result; } =item my $results = count($filename, \$sequence); return a string of dna counts including errors, and the filename =cut sub count { my ($filename, $dna) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){$a++} while($$dna =~ /c/ig){$c++} while($$dna =~ /g/ig){$g++} while($$dna =~ /t/ig){$t++} while($$dna =~ /[^acgt\s]/ig){$e++} return "$filename:\tA=$a C=$c G=$g T=$t errors=$e\n"; } =item my $rna = dna2rna($dna); Transcribe the DNA to RNA by substituting all T's with U's. =cut sub dna2rna { my $rna = shift; $rna =~ s/T/U/g; return $rna; } =item my $text = print_dna($dna); return text of dna. =cut sub print_dna { my $dna = shift; return $dna . "\n"; } =item my $text = motif($re, \$sequence, $filename); Return the text: "$re [found|missing]: $filename\n" after looking through the $$sequence. =cut sub motif { my ($motif, $sequence, $filename ) = @_; if ( $$sequence =~ /$motif/i ) { return "$motif found: $filename\n"; } else { return "$motif missing: $filename\n" } } =item my @lines = read_file($filename) Return an array of lines in the $filename. =cut sub read_file { my ($filename) = @_; # First we have to "open" the file open(FILE, $filename); # Read the data from the file, and store it # into the array variable @lines my @lines = <FILE>; close FILE; return @lines; } =item my $seq = revcom($dna) Return the reverse complement of the dna $sequence =cut sub revcom { my $dna = shift; my $revcom = reverse $dna; $revcom =~ tr/ACGTacgt/TGCAtgca/; return $revcom; } =item my $text = show_eg($text); Exit with text of examples without a header and remove last blank line. =cut sub show_eg { my @lines = @_; # remove 1st and last lines pop @lines; shift @lines; print @lines; exit (0); } =item \$data = slurp($filename); slurp in all data into a single variable, return a reference to avoid copying data. Recommend: File::Slurp library for this function. This slurp() was benchmarked with reading an array and reading lines on a slow PC, it quickly out paced arrays, and did better than lines. =cut sub slurp { my ($filename) = @_; my $inf; my $fileTerminator = $/; undef $/; open($inf, "< $filename") or die("Unable to open $filename: $!"); my $buf = <$inf>; close $inf; $/ = $fileTerminator; return \$buf } 1; # successful -- library loaded