#!perl -w
use strict;
=head1 NAME
find_count.pl
=head1 DESCRIPTION
Simple example of motif searches and counting nucleotides
in dna.
=head1 SYNOPSIS
find_count.pl [man|help]
find_count.pl [counts] [motif=] ... [filename=] ...
=head1 EXAMPLES
perl find_count.pl --filename="smallfile.txt" --counts
perl find_count.pl --f='NM_021964fragment.pep' --mo='A[DS]V' --mo='KND*E{2,}'
perl find_count.pl --f='NM_021964fragment.pep' --mo='EE.*EE','A{7,14}','(CAT){3,}'
=head1 OPTIONS
All options must have a single or double hyphen in front of the name. Only
the minimum name that is unique is required.
=over 4
=item --counts
Count dna nucleotides.
=item --eg
Show the EXAMPLES section from documentation.
=item --filename
Read in a filename or a list of filenames separated by commas
or specified from the command line.
--filename=one --filename=two --filename="three,four,five"
=item --help or ?
Return synopsis and usage information on options.
=item --man
Return the complete documentation.
=item --motif
Specify motifs to look for within the sequence. This may be
several --motif entries on the command line.
--motif='A[DS]V' --motif='KND*E{2,}' --motif='EE.*EE'
--motif='A{7,14}' --motif='(CAT){3,}'
=item --test
Run all program tests.
=item --verbose
Show original filenames before executing any functions.
Show what is missing during sequence searching.
=back
=head1 NOTES
It doesn't make sense to search protein sequences and
count dna nucleotides.
=head1 TODO
Things to do might include:
Using tr to speed up a count.
Making count smart enough to do amino acid counts.
Calculating percentages in the count.
Making a function to do the input list and de-quote code a function.
Create cool motifs to search for as examples
Consider renaming: motif_count.pl
=head1 AUTHOR
Functions from Beginning Perl for Bioinformatics by James Tisdall,
extended by David Scott to tease out a super simple library.
=cut
package main; # default namespace
# Perl library entries
use SelfLoader; # Autoload for test routines
use Pod::Usage; # documentation reader library, pod2usage()
use Getopt::Long; # option reader library, GetOptions()
# User library entries
use SeqFun qw/:all/;
pod2usage(0) if $#ARGV == -1; # no arguments if @ARGV has no entries
# general options
my $counts = ""; # count nucleotides, false by default
my $help = ""; # no help by default
my $man = ""; # no man page by default
my $quiet = ""; # opposite of verbose
my $showeg = ""; # no examples by default
my $test = ""; # test routine
my $verbose = ""; # false by default
# application options
my @motif = (); # empty list of motifs
my @filename = (); # empty list of filenames
GetOptions( "verbose!" => \$verbose, quiet => sub { $verbose = 0; },
'help|?' => \$help, man => \$man, eg => \$showeg,
test => \$test,
"motif:s" => \@motif, "filename:s" => \@filename,
counts => \$counts,
);
pod2usage(1) if $help;
pod2usage(-message => 'Spurious argument(s) left: ' . "@ARGV",
-verbose => 1) if $#ARGV != -1;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
show_eg(`podselect -s "EXAMPLES" $0`) if $showeg;
test() if $test;
# Clear out quoting from the command line
my @clearance = ();
foreach my $motif (@motif) {
$motif =~ s/\"//g;
$motif =~ s/\'//g;
push @clearance, $motif;
}
@motif = @clearance;
# Enable comma separated filenames
@filename = split(/,/,join(',',@filename));
# Clear out filename quoting from the command line
my @clearname = ();
foreach my $filename (@filename) {
$filename =~ s/\"$//;
$filename =~ s/^\"//;
$filename =~ s/\'$//;
$filename =~ s/^\'//;
push @clearname, $filename;
}
@filename = @clearname;
# The DNA
# Show the filenames if verbose
if ( $verbose ) {
print "Filenames:\n";
foreach my $filename (@filename) {
print "$filename\n";
}
}
while (my $filename = shift @filename) {
my $sequence = slurp($filename);
# Remove whitespace
$$sequence =~ s/\s//g;
print count($filename, $sequence) if $counts;
my @motifs = @motif;
while (my $motif = shift @motifs) {
print motif($motif, $sequence, $filename);
}
}
# Successful exit
exit(0);
__DATA__
# testing for program
# could run examples from docs if they were generic enough
sub test {
system("perl t/seqfun.t");
}