#!/usr/bin/perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME Seq2.pm =head1 DESCRIPTION Library of sequence functions =head1 SYNOPSIS use Seq2; my $seq = new Seq2("CATTAGGCTAATAATAAAAA"); =head1 USAGE my $dna = $seq->concatenate("CATGCCTAAAATA"); my $rna = $seq->dns2rna(); my $seq = $seq->reverse_complement(); my $seq = $seq->count(); my $seq = $seq->motif($motif); =head1 NOTES Working with sequences shows the need to work with nucleotides and proteins, and to be able to manipulate back and forth with various utilities. =head1 AUTHOR Library by David Scott to tease out a simple Object library. =head1 METHODS Method documentation follows: =cut package Seq2; use SeqFun qw/slurp/; # constructors =item my $seq = Seq2->new("CATG"); =item my $seq = new Seq2("CATG"); =item my $seq = Seq2->new($dna); =item my $seq = new Seq2($dna); Creates a sequence from the 1st argument. DEFAULT is "". =cut sub new { my ($self) = shift; my $class = ref($self) || $self; $self = { seq => shift || "", filename => "" }; return bless($self, $class); } =item my $seq = Seq2->read("my.dna"); =item my $seq = read Seq2("my.dna"); =item my $seq = Seq2->read($filename); =item my $seq = read Seq2($filename); Creates a sequence from the $filename. DEFAULT is "". =cut sub read { my ($self) = shift; my $class = ref($self) || $self; $self = { seq => "", filename => shift || "" }; my $ref = slurp($self->{filename}); $self->{seq} = $$ref; # copy data in return bless($self, $class); } # object methods =item my $myseq = $seq->concatenate($dna1, $dna2, ... ); Return a sequence which concatenates any dna sequences passed in. =cut # Concatenate the DNA fragments Using "string interpolation" sub concatenate { my ($self, @dna) = @_; my $result = $self->{seq}; foreach (@dna) { $result .= $_; } return Seq2->new($result); } =item my $results = $seq->count(); Return a string of dna counts including errors, and the filename =cut sub count { my ($self) = @_; # Initialize the counts. # Notice that we can use scalar variables to hold numbers. my $dna = \$self->{seq}; my ($a, $c, $g, $t, $e) = (0, 0, 0, 0, 0); # Use a regular expression "trick", and five while loops, # to find the counts of the four bases plus errors while($$dna =~ /a/ig){$a++} while($$dna =~ /c/ig){$c++} while($$dna =~ /g/ig){$g++} while($$dna =~ /t/ig){$t++} while($$dna =~ /[^acgt\s]/ig){$e++} return "$self->{filename}:\tA=$a C=$c G=$g T=$t errors=$e\n"; } =item my $rna = $dna->dna2rna(); Return a sequence that is rna, transcribe the DNA to RNA by substituting all T's with U's. =cut sub dna2rna { my $self = shift; my $rna = $self->{seq}; $rna =~ s/T/U/g; return Seq2->new($rna); } =item my $text = motif($re); Search for a $re in this sequence, return the text of missing or found with the sequence filename. =cut sub motif { my ($self, $motif) = @_; if ( $self->{seq} =~ /$motif/i ) { return "$motif found: $self->{filename}\n"; } else { return "$motif missing: $self->{filename}\n" } } =item my $text = $seq->print_dna; return text of dna. =cut sub print_dna { my $self = shift; return $self->{seq}; } =item my @lines = $seq->read_file($filename) Return an array of lines in the $filename, assign this sequence the new array with the new $filename. =cut sub read_file { my ($self, $filename) = @_; $self->{filename} = $filename; # First we have to "open" the file open(FILE, $filename) or die("Unable to open $filename: $!"); # Read the data from the file, and store it # into the array variable @lines my @lines = <FILE>; $self->{seq} = join('',@lines); $self->{seq} =~ s/\s//g; close FILE; return @lines; } =item my $myseq = $dna->reverse_complement() Return a Seq2 object that is the reverse complement of the $dna sequence. =cut sub reverse_complement { my $self = shift; my $revcom = reverse $self->{seq}; $revcom =~ tr/ACGTacgt/TGCAtgca/; return new Seq2->new($revcom); } 1; # successful -- library loaded