Seq.pm

#!perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME Sequence Objects at Seq.pm =head1 DESCRIPTION Object Oriented Library of Seq, RNA, and DNA. =head1 USAGE use Seq; my $seq = new Seq("CATTAGGCTAATAATAAAAA"); my $dna = $seq->concatenate("CATGCCTAAAATA"); my $rna = $seq->dns2rna; my $seq = $seq->revcom; my $seq = $seq->motif($motif); =head1 NOTES Working with sequences shows the need to work with nucleotides and proteins, and to be able to manipulate back and forth with various utilities. =head1 TODO o create a count of protein sequences o create a count of RNA sequences o create a sequence detector ... is it DNA, RNA, or protein o add error detection for dna, rna, protein o change all die functions to Carp::croak ... slurp( ) o add more functions ... make a list =head1 AUTHOR Created by David Scott to tease out a super simple library. =head1 METHODS Seq Method documentation follows: =over 4 =cut package Seq; use Carp; use GetterSetter 0.001; our @ISA = qw/GetterSetter/; use SeqFun qw/slurp/; # constructors =item my $seq = Seq->new("CATG"); =item my $seq = new Seq("CATG"); =item my $seq = Seq->new($dna); =item my $seq = new Seq($dna); Creates a sequence from the 1st argument. DEFAULT is "". =cut sub new { my ($self) = shift; my $class = ref($self) || $self; $self = { seq => shift || "", filename => "", }; return bless($self, $class); } =item my $seq = Seq->read("my.dna"); =item my $seq = read Seq("my.dna"); =item my $seq = Seq->read($filename); =item my $seq = read Seq($filename); Creates a sequence from the $filename. DEFAULT is "". =cut sub read { my ($self) = shift; my $class = ref($self) || $self; $self = { seq => "", filename => shift, }; my $ref = slurp($self->{filename}); $self->{seq} = $$ref; # copy data in return bless($self, $class); } # clone constructors =item $myRna = $seq->rna; Create an RNA object. =cut sub rna { my $self = shift; my $rna = RNA->new($self->{seq}); $rna->{filename} = $self->{filename}; return $rna; } =item $myDNA = $seq->dna; Create a DNA object. =cut sub dna { my $self = shift; my $dna = DNA->new($self->{seq}); $dna->{filename} = $self->{filename}; return $dna; } # object methods =item my $myseq = $seq->concatenate($dna1, $dna2, ... ); Return a sequence which concatenates any dna sequences passed in. =cut # Concatenate the DNA fragments Using "string interpolation" sub concatenate { my ($self, @dna) = @_; my $result = $self->{seq}; foreach (@dna) { $result .= $_; } return Seq->new($result); } =item my $text = motif($re); Search for a $re in this sequence, return the text of missing or found with the sequence filename. =cut sub motif { my ($self, $motif) = @_; if ( $self->{seq} =~ /$motif/i ) { return "$motif found: $self->{filename}\n"; } else { return "$motif missing: $self->{filename}\n" } } =item my $text = $seq->print; return text of sequences. =cut sub print { my $self = shift; return $self->{seq} . "\n"; } =item my $error = $seq->read_file($filename) Return an error message or false if it succeeded. Assign seq to the file a string with no white space, Assign filename to the $filename. CAUTION: This expects the sequence only in the file. Chained commands require a method that returns an object. =cut sub read_file { my ($self, $filename) = @_; unless ( open(FILE, $filename) ) { return "Unable to open $filename: $!"; # make no changes unless the open is successful } else { my @lines = <FILE>; $self->{seq} = join('',@lines); $self->{seq} =~ s/\s//g; $self->{filename} = $filename; close FILE; return 0; } } =item $seq->seq( ); Return the sequence associated with the Seq object. Disable any global change to the sequence. =cut sub seq { my $self = shift; croak("Illegal access to `seq' field in class Seq") if @_; return $self->{seq}; } =back =head1 NAME RNA class =head1 DESCRIPTION Class for rna functions =head1 USAGE use RNA; my $rna = new RNA("CAUUAGGCUAAUAAUAAAAA"); my $myRna = $seq->dns2rna; =head1 METHODS RNA Method documentation follows: =over 4 =cut package RNA; our @ISA = qw/Seq/; =item my $rna = RNA->new("CAUG"); =item my $rna = new RNA("CAUG"); =item my $rna = RNA->new($dna); =item my $rna = new RNA($dna); Creates an RNA sequence from the 1st argument. DEFAULT is "". =cut sub new { my ($class) = shift; my $self = $class->SUPER::new(@_); return $self; # bless($self, $class); } =back =head1 NAME DNA class =head1 DESCRIPTION Class for dna functions =head1 USAGE use DNA; my $dna = new DNA("CATTAGGCTAATAATAAAAA"); my $rna = $dna->dns2rna; my $rco = $dna->revcom; my $text = $rco->count(); =head1 METHODS DNA Method documentation follows: =over 4 =cut package DNA; our @ISA = qw/Seq/; =item my $dna = DNA->new("CATG"); =item my $dna = new DNA("CATG"); =item my $dna = DNA->new($dna); =item my $dna = new DNA($dna); Creates an DNA sequence from the 1st argument. DEFAULT is "". =cut sub new { my ($class) = shift; my $self = $class->SUPER::new(@_); return $self; # bless($self, $class); } =item my $results = $dna->count(); Return a string of dna counts including errors, and the filename =cut sub count { my ($self) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my $dna = \$self->{seq}; my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){$a++} while($$dna =~ /c/ig){$c++} while($$dna =~ /g/ig){$g++} while($$dna =~ /t/ig){$t++} while($$dna =~ /[^acgt\s]/ig){$e++} return "$self->{filename}:\tA=$a C=$c G=$g T=$t errors=$e\n"; } =item my $myRna = $dna->dna2rna(); Return a sequence that is rna, transcribe the DNA to RNA by substituting all T's with U's. =cut sub dna2rna { my $self = shift; my $rna = $self->{seq}; $rna =~ s/T/U/g; return RNA->new($rna); } =item my $myDNA = $myDna->revcom() Return a DNA object that is the reverse complement of the $dna sequence. =cut sub revcom { my $self = shift; my $revcom = reverse $self->{seq}; $revcom =~ tr/ACGTacgt/TGCAtgca/; return new DNA->new($revcom); } =back =cut 1; # successful -- library loaded