#!perl -w
use strict;
# documentation written in pod, perldoc perlpod
=head1 NAME
find_count.pl
=head1 DESCRIPTION
Simple example of motif searches and counting nucleotides
in dna.
=head1 EXAMPLES
perl find_count.pl --filename="pccx1.dna" --counts
perl find_count.pl --f='NM_021964fragment.pep' --mo='A[DS]V' --mo='KND*E{2,}'
perl find_count.pl --f='NM_021964fragment.pep' --mo='EE.*EE' --mo='A{7,14}' --mo='(CAT){3,}'
=head1 SYNOPSIS
find_count.pl [man|help]
find_count.pl [counts] [motif=] ... [filename=] ...
=head1 OPTIONS
All options must have a single or double hyphen in front of the name. Only
the minimum name that is unique is required.
=over 4
=item --counts
Count dna nucleotides.
=item --eg
Show the EXAMPLES section from documentation.
=item --filename
Read in a filename or a list of filenames separated by commas
or specified from the command line.
--filename=one --filename=two --filename="three,four,five"
=item --help or ?
Return synopsis and usage information on options.
=item --man
Return the complete documentation.
=item --motif
Specify motifs to look for within the sequence. This may be
several --motif entries on the command line.
--motif='A[DS]V' --motif='KND*E{2,}'
--motif='EE.*EE' --motif='A{7,14}' --motif='(CAT){3,}'
=item --test
Run all program tests.
=item --verbose
Show original filenames before executing any functions.
Show what is missing during sequence searching.
=back
=head1 NOTES
It doesn't make sense to search protein sequences and
count dna nucleotides.
=head1 TODO
Things to do might include:
Using tr to speed up a count.
Making count smart enough to do amino acid counts.
Calculating percentages in the count.
Making a function to do the input list and de-quote code a function.
Create cool motifs to search for as examples
Consider renaming: motif_count.pl
Consider a test of the program from EXAMPLES.
- write data first time, next time check results from run
=head1 AUTHOR
Written by David Scott to tease out a super simple library.
=cut
package main; # default namespace
# Perl library entries
use English; # Use longer names for readability
# $PROGRAM_NAME:
use SelfLoader; # Autoload for test routines
use Pod::Usage; # documentation reader library, pod2usage()
use Getopt::Long; # option reader library, GetOptions()
# User library entries
use SeqFun qw/show_eg/;
use Seq;
pod2usage(0) if $#ARGV == -1; # no arguments if @ARGV has no entries
# general options
my $counts = ""; # count nucleotides, false by default
my $help = ""; # no help by default
my $man = ""; # no man page by default
my $quiet = ""; # opposite of verbose
my $showeg = ""; # no examples by default
my $test = ""; # test routine
my $verbose = ""; # false by default
# application options
my @motif = (); # empty list of motifs
my @filename = (); # empty list of filenames
GetOptions( "verbose!" => \$verbose, quiet => sub { $verbose = 0; },
'help|?' => \$help, man => \$man, eg => \$showeg,
test => \$test,
"motif:s" => \@motif, "filename:s" => \@filename,
counts => \$counts,
);
pod2usage(1) if $help;
pod2usage(-message => 'Spurious argument(s) left: ' . "@ARGV",
-verbose => 1) if $#ARGV != -1;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
if ( $showeg ) {
print show_eg(`podselect -s "EXAMPLES" $PROGRAM_NAME`);
exit(0);
}
test() if $test;
# Clear out quoting from the command line
my @clearance = ();
foreach my $motif (@motif) {
$motif =~ s/\"//g;
$motif =~ s/\'//g;
push @clearance, $motif;
}
@motif = ();
@motif = @clearance;
# Enable comma separated filenames
@filename = split(/,/,join(',',@filename));
# Clear out filename quoting from the command line
my @clearname = ();
foreach my $filename (@filename) {
$filename =~ s/\"$//;
$filename =~ s/^\"//;
$filename =~ s/\'$//;
$filename =~ s/^\'//;
push @clearname, $filename;
}
@filename = ();
@filename = @clearname;
# Show the filenames if verbose
if ( $verbose ) {
print "Filenames:\n";
foreach my $filename (@filename) {
print "$filename\n";
}
}
while (my $filename = shift @filename) {
my $seq = Seq->read($filename);
# Remove whitespace
$seq->{seq} =~ s/\s//g;
print $seq->dna->count() if $counts;
my @motifs = @motif;
while (my $motif = shift @motifs) {
print $seq->motif($motif);
}
}
# Successful exit
exit(0);
__DATA__
# testing for program
# could run examples from docs if they were generic enough
sub test {
use File::Compare;
system("perl t/seqfun.t");
system("perl t/seq.t");
my @tests = show_eg(`podselect -s "EXAMPLES" $PROGRAM_NAME`);
# Look for a test directory
exit(0) unless -d "t";
my $name = $PROGRAM_NAME;
$name =~ s/\./_/g;
my $count = 0;
my $out_file = "";
my $chk_file = "";
foreach my $test (@tests) {
# skip blank lines
next if $test =~ /^\s*$/;
# remove white space
$test =~ s/^\s*//;
$test =~ s/\s*$//;
++$count;
$out_file = "t/out_$name.$count";
$chk_file = "t/chk_$name.$count";
if ( -f $out_file ) {
print "Generating test comparison: $chk_file\n" if $verbose;
system("$test > $chk_file");
if ( compare( $out_file, $chk_file ) ) {
print "not ok $count test: $test\n";
} else {
print "ok $count test: $test\n";
}
} else {
print "Generating test output: $out_file\n" if $verbose;
system("$test > $out_file");
print "ok $count Generating output only\n";
}
}
exit(0);
}