#!perl -w
use strict;
# documentation written in pod, perldoc perlpod
=head1 NAME
Sequence Objects at Seq.pm
=head1 DESCRIPTION
Object Oriented Library of Seq, RNA, and DNA.
=head1 USAGE
use Seq;
my $seq = new Seq("CATTAGGCTAATAATAAAAA");
my $dna = $seq->concatenate("CATGCCTAAAATA");
my $rna = $seq->dns2rna;
my $seq = $seq->revcom;
my $seq = $seq->motif($motif);
=head1 NOTES
Working with sequences shows the need to work with nucleotides
and proteins, and to be able to manipulate back and forth with
various utilities.
=head1 TODO
o create a count of protein sequences
o create a count of RNA sequences
o create a sequence detector ... is it DNA, RNA, or protein
o add error detection for dna, rna, protein
o change all die functions to Carp::croak ... slurp( )
o add more functions ... make a list
=head1 AUTHOR
Created by David Scott to tease out a super simple library.
=head1 METHODS
Seq Method documentation follows:
=over 4
=cut
package Seq;
use Carp;
use GetterSetter 0.001;
our @ISA = qw/GetterSetter/;
use SeqFun qw/slurp/;
# constructors
=item my $seq = Seq->new("CATG");
=item my $seq = new Seq("CATG");
=item my $seq = Seq->new($dna);
=item my $seq = new Seq($dna);
Creates a sequence from the 1st argument. DEFAULT is "".
=cut
sub new {
my ($self) = shift;
my $class = ref($self) || $self;
$self = { seq => shift || "", filename => "", };
return bless($self, $class);
}
=item my $seq = Seq->read("my.dna");
=item my $seq = read Seq("my.dna");
=item my $seq = Seq->read($filename);
=item my $seq = read Seq($filename);
Creates a sequence from the $filename. DEFAULT is "".
=cut
sub read {
my ($self) = shift;
my $class = ref($self) || $self;
$self = { seq => "", filename => shift, };
my $ref = slurp($self->{filename});
$self->{seq} = $$ref; # copy data in
return bless($self, $class);
}
# clone constructors
=item $myRna = $seq->rna;
Create an RNA object.
=cut
sub rna {
my $self = shift;
my $rna = RNA->new($self->{seq});
$rna->{filename} = $self->{filename};
return $rna;
}
=item $myDNA = $seq->dna;
Create a DNA object.
=cut
sub dna {
my $self = shift;
my $dna = DNA->new($self->{seq});
$dna->{filename} = $self->{filename};
return $dna;
}
# object methods
=item my $myseq = $seq->concatenate($dna1, $dna2, ... );
Return a sequence which concatenates any dna sequences
passed in.
=cut
# Concatenate the DNA fragments Using "string interpolation"
sub concatenate {
my ($self, @dna) = @_;
my $result = $self->{seq};
foreach (@dna) {
$result .= $_;
}
return Seq->new($result);
}
=item my $text = motif($re);
Search for a $re in this sequence, return
the text of missing or found with the
sequence filename.
=cut
sub motif {
my ($self, $motif) = @_;
if ( $self->{seq} =~ /$motif/i ) {
return "$motif found: $self->{filename}\n";
} else {
return "$motif missing: $self->{filename}\n"
}
}
=item my $text = $seq->print;
return text of sequences.
=cut
sub print {
my $self = shift;
return $self->{seq} . "\n";
}
=item my $error = $seq->read_file($filename)
Return an error message or false if it succeeded.
Assign seq to the file a string with no white space,
Assign filename to the $filename.
CAUTION: This expects the sequence only in the file.
Chained commands require a method that returns an object.
=cut
sub read_file {
my ($self, $filename) = @_;
unless ( open(FILE, $filename) ) {
return "Unable to open $filename: $!";
# make no changes unless the open is successful
} else {
my @lines = ;
$self->{seq} = join('',@lines);
$self->{seq} =~ s/\s//g;
$self->{filename} = $filename;
close FILE;
return 0;
}
}
=item $seq->seq( );
Return the sequence associated with the Seq object.
Disable any global change to the sequence.
=cut
sub seq {
my $self = shift;
croak("Illegal access to `seq' field in class Seq") if @_;
return $self->{seq};
}
=back
=head1 NAME
RNA class
=head1 DESCRIPTION
Class for rna functions
=head1 USAGE
use RNA;
my $rna = new RNA("CAUUAGGCUAAUAAUAAAAA");
my $myRna = $seq->dns2rna;
=head1 METHODS
RNA Method documentation follows:
=over 4
=cut
package RNA;
our @ISA = qw/Seq/;
=item my $rna = RNA->new("CAUG");
=item my $rna = new RNA("CAUG");
=item my $rna = RNA->new($dna);
=item my $rna = new RNA($dna);
Creates an RNA sequence from the 1st argument. DEFAULT is "".
=cut
sub new {
my ($class) = shift;
my $self = $class->SUPER::new(@_);
return $self; # bless($self, $class);
}
=back
=head1 NAME
DNA class
=head1 DESCRIPTION
Class for dna functions
=head1 USAGE
use DNA;
my $dna = new DNA("CATTAGGCTAATAATAAAAA");
my $rna = $dna->dns2rna;
my $rco = $dna->revcom;
my $text = $rco->count();
=head1 METHODS
DNA Method documentation follows:
=over 4
=cut
package DNA;
our @ISA = qw/Seq/;
=item my $dna = DNA->new("CATG");
=item my $dna = new DNA("CATG");
=item my $dna = DNA->new($dna);
=item my $dna = new DNA($dna);
Creates an DNA sequence from the 1st argument. DEFAULT is "".
=cut
sub new {
my ($class) = shift;
my $self = $class->SUPER::new(@_);
return $self; # bless($self, $class);
}
=item my $results = $dna->count();
Return a string of dna counts including errors, and the filename
=cut
sub count {
my ($self) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my $dna = \$self->{seq};
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){$a++}
while($$dna =~ /c/ig){$c++}
while($$dna =~ /g/ig){$g++}
while($$dna =~ /t/ig){$t++}
while($$dna =~ /[^acgt\s]/ig){$e++}
return "$self->{filename}:\tA=$a C=$c G=$g T=$t errors=$e\n";
}
=item my $myRna = $dna->dna2rna();
Return a sequence that is rna, transcribe the DNA to RNA
by substituting all T's with U's.
=cut
sub dna2rna {
my $self = shift;
my $rna = $self->{seq};
$rna =~ s/T/U/g;
return RNA->new($rna);
}
=item my $myDNA = $myDna->revcom()
Return a DNA object that is the reverse complement of the $dna
sequence.
=cut
sub revcom {
my $self = shift;
my $revcom = reverse $self->{seq};
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return new DNA->new($revcom);
}
=back
=cut
1; # successful -- library loaded