seqfun.pl

#!perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME seqfun.pl =head1 DESCRIPTION Simple example of building functions for sequences. =head1 SYNOPSIS seqfun.pl [man|help] seqfun.pl [all|cat|dna2rna] [sequence=<sequence>] ... [filename=<filename>] ... =head1 OPTIONS All options must have a single or double hyphen in front of the name. Only the minimum name that is unique is required. =over 4 =item --all Execute all sequence functions. =item --cat Concatenate each sequences into one big sequence. =item --dna2rna Transcribe each sequences from dna to rna. =item --eg Show the EXAMPLES section from documentation. =item --filename Read in a filename or a list of filenames separated by commas or specified from the command line. --filename=one --filename=two --filename="three,four,five" Each file will add each line to the list of sequences. =item --help or ? Show a help page =item --man Show a man page or complete reference. =item --reversecomplement Changes each sequence to the reverse complement. =item --sequence Read in a sequence or list of sequences separated by commas or specified from the command line. =item --test Test all functions. =item --verbose Show original sequences before executing any functions. =back =head1 EXAMPLES perl seqfun.pl --cat --seq='ACGGTTATAGC','ATAGTTAGTA'; perl seqfun.pl --dna2rna --seq='ACGGGAGGACGGGAAAATTACTACGGCATTAGC' perl seqfun.pl --all --seq='ACGGTTATAGC','ATAGTTAGTA'; perl seqfun.pl --v --all filename='pccx1.dna','znf148.dna','mmp3.dna' =head1 NOTES Working with sequences shows the need to work with nucleotides and proteins, and to be able to manipulate back and forth with various utilities. =head1 AUTHOR A tested, documented program created by David Scott =cut package main; # default namespace use SelfLoader; # Autoload for test routines use Pod::Usage; # documentation reader library, pod2usage() use Getopt::Long; # option reader library, GetOptions() pod2usage(0) if $#ARGV == -1; # no arguments if @ARGV has no entries # general options my $all = ""; # run everything, false by default my $help = ""; # no help by default my $man = ""; # no man page by default my $showeg = ""; # no examples by default my $test = ""; # test routine my $verbose = ""; # false by default # application options my @sequence = (); # empty list of sequences my @filename = (); # empty list of filenames my $cat = ""; # false, concatenate my $dna2rna = ""; # false, transcribe dna to rna my $rc = ""; # false, reverse complement GetOptions( "verbose!" => \$verbose, quiet => sub { $verbose = 0; }, all => \$all, 'help|?' => \$help, man => \$man, eg => \$showeg, test => \$test, "sequence:s" => \@sequence, "filename:s" => \@filename, cat => \$cat, dna2rna => \$dna2rna, reversecomplement => \$rc, ); pod2usage(1) if $help; pod2usage(-message => 'Spurious argument(s) left: ' . "@ARGV", -verbose => 1) if $#ARGV != -1; pod2usage(-exitstatus => 0, -verbose => 2) if $man; show_eg(`podselect -s "EXAMPLES" seqfun.pl`) if $showeg; test() if $test; # Enable comma separated sequences @sequence = split(/,/,join(',',@sequence)); # Clear out sequence quoting from the command line my @clearance = (); foreach my $seq (@sequence) { $seq =~ s/\"//g; $seq =~ s/\'//g; push @clearance, $seq; } @sequence = (); @sequence = @clearance; # Enable comma separated filenames @filename = split(/,/,join(',',@filename)); # Clear out filename quoting from the command line my @clearname = (); foreach my $filename (@filename) { $filename =~ s/\"$//; $filename =~ s/^\"//; $filename =~ s/\'$//; $filename =~ s/^\'//; push @clearname, $filename; } @filename = (); @filename = @clearname; # Add any filename sequences to the list of sequences foreach my $filename (@filename) { push @sequence, read_file($filename); @clearance = (); @clearance = @sequence; } # The DNA # Show the original sequences if verbose if ( $verbose ) { print "Sequences:\n"; foreach my $seq (@sequence) { print_dna($seq); print "\n"; } } # concatenate all sequences from @clearance my $bigseq = ""; if ( $cat or $all ) { $bigseq = shift @clearance; foreach my $seq (@clearance) { $bigseq = concatenate($bigseq,$seq); } print "Result of concatenating sequences:\n\n"; print_dna($bigseq); } # dna transcribed to rna if ( $dna2rna or $all ) { print "\n"; print "Transcribing DNA to RNA:\n\n"; foreach my $seq (@sequence) { print_dna(dna2rna($seq)); } print_dna(dna2rna($bigseq)) if $bigseq; } # reverse complement of sequence(s) if ( $rc or $all ) { print "\n"; print "Reverse Complement \n\n"; foreach my $seq (@sequence) { print_dna(revcom($seq)); } print_dna(revcom($bigseq)) if $bigseq; } # Successful Exit exit 0; # Example 4 in functions # Concatenate the DNA fragments Using "string interpolation" sub concatenate { my ($dna1, $dna2) = @_; my $dna3 = $dna1 . $dna2; return $dna3; } # sub to print the DNA onto the screen sub print_dna { my $dna = shift; print $dna . "\n"; } # my @lines = read_file($filename) sub read_file { my ($filename) = @_; # First we have to "open" the file open(FILE, $filename); # Read the data from the file, and store it # into the array variable @lines my @lines = <FILE>; close FILE; return @lines; } # Calculate the reverse complement sub revcom { my $dna = shift; # Make a new copy of the DNA (see why we saved the original?) my $revcom = reverse $dna; # See the text for a discussion of tr/// $revcom =~ tr/ACGTacgt/TGCAtgca/; return $revcom; } # Show only the examples sub show_eg { my @lines = @_; # remove 1st and last lines pop @lines; shift @lines; print @lines; exit (0); } # Transcribe the DNA to RNA by substituting all T's with U's. sub dna2rna { my $rna = shift; $rna =~ s/T/U/g; return $rna; } __DATA__ # test functions # Tests have to be SelfLoaded because the test sets up its run # during the loading of the library: Test::More # Test the stable part of your program, the functions that reveal # the science. A program is often the greatest point of changes. # Not all functions should be tested ... many are seen immediately. # sub test { use Test::More tests => 3; my $dna1 = "AGCT"; my $dna2 = "GATC"; is(concatenate($dna1,$dna2),"AGCTGATC",'Test concatenate()'); is(revcom($dna1),"AGCT",'Test revcom()'); is(dna2rna($dna1),"AGCU",'Test dna2rna()'); # successful exit exit (0); }