#!/usr/bin/perl -w
use strict;
# documentation written in pod, perldoc perlpod
=head1 NAME
Seq2.pm
=head1 DESCRIPTION
Library of sequence functions
=head1 SYNOPSIS
use Seq2;
my $seq = new Seq2("CATTAGGCTAATAATAAAAA");
=head1 USAGE
my $dna = $seq->concatenate("CATGCCTAAAATA");
my $rna = $seq->dns2rna();
my $seq = $seq->reverse_complement();
my $seq = $seq->count();
my $seq = $seq->motif($motif);
=head1 NOTES
Working with sequences shows the need to work with nucleotides
and proteins, and to be able to manipulate back and forth with
various utilities.
=head1 AUTHOR
Library by David Scott to tease out a simple Object library.
=head1 METHODS
Method documentation follows:
=cut
package Seq2;
use SeqFun qw/slurp/;
# constructors
=item my $seq = Seq2->new("CATG");
=item my $seq = new Seq2("CATG");
=item my $seq = Seq2->new($dna);
=item my $seq = new Seq2($dna);
Creates a sequence from the 1st argument. DEFAULT is "".
=cut
sub new {
my ($self) = shift;
my $class = ref($self) || $self;
$self = { seq => shift || "", filename => "" };
return bless($self, $class);
}
=item my $seq = Seq2->read("my.dna");
=item my $seq = read Seq2("my.dna");
=item my $seq = Seq2->read($filename);
=item my $seq = read Seq2($filename);
Creates a sequence from the $filename. DEFAULT is "".
=cut
sub read {
my ($self) = shift;
my $class = ref($self) || $self;
$self = { seq => "", filename => shift || "" };
my $ref = slurp($self->{filename});
$self->{seq} = $$ref; # copy data in
return bless($self, $class);
}
# object methods
=item my $myseq = $seq->concatenate($dna1, $dna2, ... );
Return a sequence which concatenates any dna sequences
passed in.
=cut
# Concatenate the DNA fragments Using "string interpolation"
sub concatenate {
my ($self, @dna) = @_;
my $result = $self->{seq};
foreach (@dna) {
$result .= $_;
}
return Seq2->new($result);
}
=item my $results = $seq->count();
Return a string of dna counts including errors, and the filename
=cut
sub count {
my ($self) = @_;
# Initialize the counts.
# Notice that we can use scalar variables to hold numbers.
my $dna = \$self->{seq};
my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0);
# Use a regular expression "trick", and five while loops,
# to find the counts of the four bases plus errors
while($$dna =~ /a/ig){$a++}
while($$dna =~ /c/ig){$c++}
while($$dna =~ /g/ig){$g++}
while($$dna =~ /t/ig){$t++}
while($$dna =~ /[^acgt\s]/ig){$e++}
return "$self->{filename}:\tA=$a C=$c G=$g T=$t errors=$e\n";
}
=item my $rna = $dna->dna2rna();
Return a sequence that is rna, transcribe the DNA to RNA
by substituting all T's with U's.
=cut
sub dna2rna {
my $self = shift;
my $rna = $self->{seq};
$rna =~ s/T/U/g;
return Seq2->new($rna);
}
=item my $text = motif($re);
Search for a $re in this sequence, return
the text of missing or found with the
sequence filename.
=cut
sub motif {
my ($self, $motif) = @_;
if ( $self->{seq} =~ /$motif/i ) {
return "$motif found: $self->{filename}\n";
} else {
return "$motif missing: $self->{filename}\n"
}
}
=item my $text = $seq->print_dna;
return text of dna.
=cut
sub print_dna {
my $self = shift;
return $self->{seq};
}
=item my @lines = $seq->read_file($filename)
Return an array of lines in the $filename, assign
this sequence the new array with the new $filename.
=cut
sub read_file {
my ($self, $filename) = @_;
$self->{filename} = $filename;
# First we have to "open" the file
open(FILE, $filename) or die("Unable to open $filename: $!");
# Read the data from the file, and store it
# into the array variable @lines
my @lines = ;
$self->{seq} = join('',@lines);
$self->{seq} =~ s/\s//g;
close FILE;
return @lines;
}
=item my $myseq = $dna->reverse_complement()
Return a Seq2 object that is the reverse complement of the $dna
sequence.
=cut
sub reverse_complement {
my $self = shift;
my $revcom = reverse $self->{seq};
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return new Seq2->new($revcom);
}
1; # successful -- library loaded