find_count.pl

#!perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME find_count.pl =head1 DESCRIPTION Simple example of motif searches and counting nucleotides in dna. =head1 EXAMPLES perl find_count.pl --filename="pccx1.dna" --counts perl find_count.pl --f='NM_021964fragment.pep' --mo='A[DS]V' --mo='KND*E{2,}' perl find_count.pl --f='NM_021964fragment.pep' --mo='EE.*EE' --mo='A{7,14}' --mo='(CAT){3,}' =head1 SYNOPSIS find_count.pl [man|help] find_count.pl [counts] [motif=<regular_expresion>] ... [filename=<filename>] ... =head1 OPTIONS All options must have a single or double hyphen in front of the name. Only the minimum name that is unique is required. =over 4 =item --counts Count dna nucleotides. =item --eg Show the EXAMPLES section from documentation. =item --filename Read in a filename or a list of filenames separated by commas or specified from the command line. --filename=one --filename=two --filename="three,four,five" =item --help or ? Return synopsis and usage information on options. =item --man Return the complete documentation. =item --motif Specify motifs to look for within the sequence. This may be several --motif entries on the command line. --motif='A[DS]V' --motif='KND*E{2,}' --motif='EE.*EE' --motif='A{7,14}' --motif='(CAT){3,}' =item --test Run all program tests. =item --verbose Show original filenames before executing any functions. Show what is missing during sequence searching. =back =head1 NOTES It doesn't make sense to search protein sequences and count dna nucleotides. =head1 TODO Things to do might include: Using tr to speed up a count. Making count smart enough to do amino acid counts. Calculating percentages in the count. Making a function to do the input list and de-quote code a function. Create cool motifs to search for as examples Consider renaming: motif_count.pl Consider a test of the program from EXAMPLES. - write data first time, next time check results from run =head1 AUTHOR Written by David Scott to tease out a super simple library. =cut package main; # default namespace # Perl library entries use English; # Use longer names for readability # $PROGRAM_NAME: use SelfLoader; # Autoload for test routines use Pod::Usage; # documentation reader library, pod2usage() use Getopt::Long; # option reader library, GetOptions() # User library entries use SeqFun qw/show_eg/; use Seq; pod2usage(0) if $#ARGV == -1; # no arguments if @ARGV has no entries # general options my $counts = ""; # count nucleotides, false by default my $help = ""; # no help by default my $man = ""; # no man page by default my $quiet = ""; # opposite of verbose my $showeg = ""; # no examples by default my $test = ""; # test routine my $verbose = ""; # false by default # application options my @motif = (); # empty list of motifs my @filename = (); # empty list of filenames GetOptions( "verbose!" => \$verbose, quiet => sub { $verbose = 0; }, 'help|?' => \$help, man => \$man, eg => \$showeg, test => \$test, "motif:s" => \@motif, "filename:s" => \@filename, counts => \$counts, ); pod2usage(1) if $help; pod2usage(-message => 'Spurious argument(s) left: ' . "@ARGV", -verbose => 1) if $#ARGV != -1; pod2usage(-exitstatus => 0, -verbose => 2) if $man; if ( $showeg ) { print show_eg(`podselect -s "EXAMPLES" $PROGRAM_NAME`); exit(0); } test() if $test; # Clear out quoting from the command line my @clearance = (); foreach my $motif (@motif) { $motif =~ s/\"//g; $motif =~ s/\'//g; push @clearance, $motif; } @motif = (); @motif = @clearance; # Enable comma separated filenames @filename = split(/,/,join(',',@filename)); # Clear out filename quoting from the command line my @clearname = (); foreach my $filename (@filename) { $filename =~ s/\"$//; $filename =~ s/^\"//; $filename =~ s/\'$//; $filename =~ s/^\'//; push @clearname, $filename; } @filename = (); @filename = @clearname; # Show the filenames if verbose if ( $verbose ) { print "Filenames:\n"; foreach my $filename (@filename) { print "$filename\n"; } } while (my $filename = shift @filename) { my $seq = Seq->read($filename); # Remove whitespace $seq->{seq} =~ s/\s//g; print $seq->dna->count() if $counts; my @motifs = @motif; while (my $motif = shift @motifs) { print $seq->motif($motif); } } # Successful exit exit(0); __DATA__ # testing for program # could run examples from docs if they were generic enough sub test { use File::Compare; system("perl t/seqfun.t"); system("perl t/seq.t"); my @tests = show_eg(`podselect -s "EXAMPLES" $PROGRAM_NAME`); # Look for a test directory exit(0) unless -d "t"; my $name = $PROGRAM_NAME; $name =~ s/\./_/g; my $count = 0; my $out_file = ""; my $chk_file = ""; foreach my $test (@tests) { # skip blank lines next if $test =~ /^\s*$/; # remove white space $test =~ s/^\s*//; $test =~ s/\s*$//; ++$count; $out_file = "t/out_$name.$count"; $chk_file = "t/chk_$name.$count"; if ( -f $out_file ) { print "Generating test comparison: $chk_file\n" if $verbose; system("$test > $chk_file"); if ( compare( $out_file, $chk_file ) ) { print "not ok $count test: $test\n"; } else { print "ok $count test: $test\n"; } } else { print "Generating test output: $out_file\n" if $verbose; system("$test > $out_file"); print "ok $count Generating output only\n"; } } exit(0); }