find_count.pl

#!perl -w use strict; =head1 NAME find_count.pl =head1 DESCRIPTION Simple example of motif searches and counting nucleotides in dna. =head1 SYNOPSIS find_count.pl [man|help] find_count.pl [counts] [motif=<regular_expresion>] ... [filename=<filename>] ... =head1 EXAMPLES perl find_count.pl --filename="smallfile.txt" --counts perl find_count.pl --f='NM_021964fragment.pep' --mo='A[DS]V' --mo='KND*E{2,}' perl find_count.pl --f='NM_021964fragment.pep' --mo='EE.*EE','A{7,14}','(CAT){3,}' =head1 OPTIONS All options must have a single or double hyphen in front of the name. Only the minimum name that is unique is required. =over 4 =item --counts Count dna nucleotides. =item --eg Show the EXAMPLES section from documentation. =item --filename Read in a filename or a list of filenames separated by commas or specified from the command line. --filename=one --filename=two --filename="three,four,five" =item --help or ? Return synopsis and usage information on options. =item --man Return the complete documentation. =item --motif Specify motifs to look for within the sequence. This may be several --motif entries on the command line. --motif='A[DS]V' --motif='KND*E{2,}' --motif='EE.*EE' --motif='A{7,14}' --motif='(CAT){3,}' =item --test Run all program tests. =item --verbose Show original filenames before executing any functions. Show what is missing during sequence searching. =back =head1 NOTES It doesn't make sense to search protein sequences and count dna nucleotides. =head1 TODO Things to do might include: Using tr to speed up a count. Making count smart enough to do amino acid counts. Calculating percentages in the count. Making a function to do the input list and de-quote code a function. Create cool motifs to search for as examples Consider renaming: motif_count.pl =head1 AUTHOR Functions from Beginning Perl for Bioinformatics by James Tisdall, extended by David Scott to tease out a super simple library. =cut package main; # default namespace # Perl library entries use SelfLoader; # Autoload for test routines use Pod::Usage; # documentation reader library, pod2usage() use Getopt::Long; # option reader library, GetOptions() # User library entries use SeqFun qw/:all/; pod2usage(0) if $#ARGV == -1; # no arguments if @ARGV has no entries # general options my $counts = ""; # count nucleotides, false by default my $help = ""; # no help by default my $man = ""; # no man page by default my $quiet = ""; # opposite of verbose my $showeg = ""; # no examples by default my $test = ""; # test routine my $verbose = ""; # false by default # application options my @motif = (); # empty list of motifs my @filename = (); # empty list of filenames GetOptions( "verbose!" => \$verbose, quiet => sub { $verbose = 0; }, 'help|?' => \$help, man => \$man, eg => \$showeg, test => \$test, "motif:s" => \@motif, "filename:s" => \@filename, counts => \$counts, ); pod2usage(1) if $help; pod2usage(-message => 'Spurious argument(s) left: ' . "@ARGV", -verbose => 1) if $#ARGV != -1; pod2usage(-exitstatus => 0, -verbose => 2) if $man; show_eg(`podselect -s "EXAMPLES" $0`) if $showeg; test() if $test; # Clear out quoting from the command line my @clearance = (); foreach my $motif (@motif) { $motif =~ s/\"//g; $motif =~ s/\'//g; push @clearance, $motif; } @motif = @clearance; # Enable comma separated filenames @filename = split(/,/,join(',',@filename)); # Clear out filename quoting from the command line my @clearname = (); foreach my $filename (@filename) { $filename =~ s/\"$//; $filename =~ s/^\"//; $filename =~ s/\'$//; $filename =~ s/^\'//; push @clearname, $filename; } @filename = @clearname; # The DNA # Show the filenames if verbose if ( $verbose ) { print "Filenames:\n"; foreach my $filename (@filename) { print "$filename\n"; } } while (my $filename = shift @filename) { my $sequence = slurp($filename); # Remove whitespace $$sequence =~ s/\s//g; print count($filename, $sequence) if $counts; my @motifs = @motif; while (my $motif = shift @motifs) { print motif($motif, $sequence, $filename); } } # Successful exit exit(0); __DATA__ # testing for program # could run examples from docs if they were generic enough sub test { system("perl t/seqfun.t"); }