#!perl -w
use strict;
# documentation written in pod, perldoc perlpod
=head1 NAME
seqfun.pl
=head1 DESCRIPTION
Simple example of building functions for sequences.
=head1 SYNOPSIS
seqfun.pl [man|help]
seqfun.pl [all|cat|dna2rna] [sequence=] ... [filename=] ...
=head1 OPTIONS
All options must have a single or double hyphen in front of the name. Only
the minimum name that is unique is required.
=over 4
=item --all
Execute all sequence functions.
=item --cat
Concatenate each sequences into one big sequence.
=item --dna2rna
Transcribe each sequences from dna to rna.
=item --eg
Show the EXAMPLES section from documentation.
=item --filename
Read in a filename or a list of filenames separated by commas
or specified from the command line.
--filename=one --filename=two --filename="three,four,five"
Each file will add each line to the list of sequences.
=item --help or ?
Show a help page
=item --man
Show a man page or complete reference.
=item --reversecomplement
Changes each sequence to the reverse complement.
=item --sequence
Read in a sequence or list of sequences separated by commas
or specified from the command line.
=item --test
Test all functions.
=item --verbose
Show original sequences before executing any functions.
=back
=head1 EXAMPLES
perl seqfun.pl --cat --seq='ACGGTTATAGC','ATAGTTAGTA';
perl seqfun.pl --dna2rna --seq='ACGGGAGGACGGGAAAATTACTACGGCATTAGC'
perl seqfun.pl --all --seq='ACGGTTATAGC','ATAGTTAGTA';
perl seqfun.pl --v --all filename='pccx1.dna','znf148.dna','mmp3.dna'
=head1 NOTES
Working with sequences shows the need to work with nucleotides
and proteins, and to be able to manipulate back and forth with
various utilities.
=head1 AUTHOR
A tested, documented program created by David Scott
=cut
package main; # default namespace
use SelfLoader; # Autoload for test routines
use Pod::Usage; # documentation reader library, pod2usage()
use Getopt::Long; # option reader library, GetOptions()
pod2usage(0) if $#ARGV == -1; # no arguments if @ARGV has no entries
# general options
my $all = ""; # run everything, false by default
my $help = ""; # no help by default
my $man = ""; # no man page by default
my $showeg = ""; # no examples by default
my $test = ""; # test routine
my $verbose = ""; # false by default
# application options
my @sequence = (); # empty list of sequences
my @filename = (); # empty list of filenames
my $cat = ""; # false, concatenate
my $dna2rna = ""; # false, transcribe dna to rna
my $rc = ""; # false, reverse complement
GetOptions( "verbose!" => \$verbose, quiet => sub { $verbose = 0; },
all => \$all, 'help|?' => \$help, man => \$man, eg => \$showeg,
test => \$test,
"sequence:s" => \@sequence, "filename:s" => \@filename,
cat => \$cat, dna2rna => \$dna2rna, reversecomplement => \$rc,
);
pod2usage(1) if $help;
pod2usage(-message => 'Spurious argument(s) left: ' . "@ARGV",
-verbose => 1) if $#ARGV != -1;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
show_eg(`podselect -s "EXAMPLES" seqfun.pl`) if $showeg;
test() if $test;
# Enable comma separated sequences
@sequence = split(/,/,join(',',@sequence));
# Clear out sequence quoting from the command line
my @clearance = ();
foreach my $seq (@sequence) {
$seq =~ s/\"//g;
$seq =~ s/\'//g;
push @clearance, $seq;
}
@sequence = ();
@sequence = @clearance;
# Enable comma separated filenames
@filename = split(/,/,join(',',@filename));
# Clear out filename quoting from the command line
my @clearname = ();
foreach my $filename (@filename) {
$filename =~ s/\"$//;
$filename =~ s/^\"//;
$filename =~ s/\'$//;
$filename =~ s/^\'//;
push @clearname, $filename;
}
@filename = ();
@filename = @clearname;
# Add any filename sequences to the list of sequences
foreach my $filename (@filename) {
push @sequence, read_file($filename);
@clearance = ();
@clearance = @sequence;
}
# The DNA
# Show the original sequences if verbose
if ( $verbose ) {
print "Sequences:\n";
foreach my $seq (@sequence) {
print_dna($seq);
print "\n";
}
}
# concatenate all sequences from @clearance
my $bigseq = "";
if ( $cat or $all ) {
$bigseq = shift @clearance;
foreach my $seq (@clearance) {
$bigseq = concatenate($bigseq,$seq);
}
print "Result of concatenating sequences:\n\n";
print_dna($bigseq);
}
# dna transcribed to rna
if ( $dna2rna or $all ) {
print "\n";
print "Transcribing DNA to RNA:\n\n";
foreach my $seq (@sequence) {
print_dna(dna2rna($seq));
}
print_dna(dna2rna($bigseq)) if $bigseq;
}
# reverse complement of sequence(s)
if ( $rc or $all ) {
print "\n";
print "Reverse Complement \n\n";
foreach my $seq (@sequence) {
print_dna(revcom($seq));
}
print_dna(revcom($bigseq)) if $bigseq;
}
# Successful Exit
exit 0;
# Example 4 in functions
# Concatenate the DNA fragments Using "string interpolation"
sub concatenate {
my ($dna1, $dna2) = @_;
my $dna3 = $dna1 . $dna2;
return $dna3;
}
# sub to print the DNA onto the screen
sub print_dna {
my $dna = shift;
print $dna . "\n";
}
# my @lines = read_file($filename)
sub read_file {
my ($filename) = @_;
# First we have to "open" the file
open(FILE, $filename);
# Read the data from the file, and store it
# into the array variable @lines
my @lines = ;
close FILE;
return @lines;
}
# Calculate the reverse complement
sub revcom {
my $dna = shift;
# Make a new copy of the DNA (see why we saved the original?)
my $revcom = reverse $dna;
# See the text for a discussion of tr///
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return $revcom;
}
# Show only the examples
sub show_eg {
my @lines = @_;
# remove 1st and last lines
pop @lines;
shift @lines;
print @lines;
exit (0);
}
# Transcribe the DNA to RNA by substituting all T's with U's.
sub dna2rna {
my $rna = shift;
$rna =~ s/T/U/g;
return $rna;
}
__DATA__
# test functions
# Tests have to be SelfLoaded because the test sets up its run
# during the loading of the library: Test::More
# Test the stable part of your program, the functions that reveal
# the science. A program is often the greatest point of changes.
# Not all functions should be tested ... many are seen immediately.
#
sub test {
use Test::More tests => 3;
my $dna1 = "AGCT";
my $dna2 = "GATC";
is(concatenate($dna1,$dna2),"AGCTGATC",'Test concatenate()');
is(revcom($dna1),"AGCT",'Test revcom()');
is(dna2rna($dna1),"AGCU",'Test dna2rna()');
# successful exit
exit (0);
}