#!perl -w
use strict;
=head1 NAME Sequence class
Sequence - Object to manage sequences.
=head1 SYNOPSIS
use Sequence;
my $seq1 = Sequence->new(seq => "catcatcat");
my $result = $seq1->revcom;
my $seq2 = Sequence->new(file => $filename);
my $seq3 = Sequence->new(raw => $fasta_lines);
@sequence = Sequence->new();
$seq = Sequence->new($name);
$seq->save($filename);
$seq->save;
=head1 Sequence DESCRIPTION
An sequence class makes an object for all sequence
annotation. Saves the raw form, creates pure sequence
and pure annotation areas.
=head1 Sequence METHODS
These methods suppport Sequence objects:
=over 4
=cut
package Sequence;
use Carp;
use File::Slurp;
# our $AUTOLOAD;
BEGIN {
our $VERSION = '0.001';
$VERSION = eval $VERSION;
}
use constant FORMAT => qw /
FASTA GenBank raw unknown
/;
use constant SEQ_TYPE => qw /
dna rna protein
/;
# my %args = _args(@_);
# handle either ->field($name) or ->field(name => $name)
# my $name = (@_ % 2 == 0) ? '' : shift();
# my %args = _args(@_);
# $args{name} ||= $name;
sub _args (;@) {
croak "Odd number of arguments passed into ", (caller(1))[3]
unless (@_ % 2 == 0);
# strip off any leading '-opt'
my @args;
while (@_) {
(my $k = shift) =~ s/^-//;
push @args, $k, shift;
}
return @args;
}
=item SUPER::new($name);
Sequence can be inherited, use this call to
properly initialize the base object.
A sequence contains:
file - filename
name - name of file
dir - directory where file resides
data - sequence data
info - name value pairs describing sequence
form - file format; FASTA, GenBank, ...
type - sequence type; dna, protein, ...
A sequence is constructed with read($file) or
create($sequence) or create(@sequence);
=item $seq = Sequence->new(%args);
Constructs a Sequence object.
file => $filename,
raw => $entire_sequence,
seq => $sequence | @sequence,
anot => %annotation,
form => $file_format; FASTA, GenBank, ...
type => $sequence_type; dna, rna, protein
=cut
sub new {
my $self = shift;
my $class = ref($self) || $self;
$self = bless({ }, $class);
return bless($self->init(@_), $class);
}
# Constructor helper method
# my $seq = $self->init($name);
# return $self->init($name);
# initialize all variables & return $self or undef
# read the raw data if not specified
sub init {
my $self = shift;
my %args = _args(@_);
# initialize variables, read any file entry
if ( $args{file} and ! defined $args{raw} ) {
my $lines = File::Slurp::read_file($args{file});
$args{raw} = $lines;
};
$self->{file} = $self->{anot} = $self->{seq} = "";
$self->{raw} = $self->{form} = $self->{type} = "";
# catch any set parameters
for my $item ( keys %args ) {
$self->{$item} = $args{$item};
}
return $self;
}
=item $seq->file( [$value] );
get or set a file entry
=cut
sub file {
my ($self, $value) = @_;
if ( $value ) {
$self->{file} = $value;
}
return $self->{file};
}
=item $seq->raw( [$value] );
get or set a raw entry
=cut
sub raw {
my ($self, $value) = @_;
if ( $value ) {
$self->{raw} = $value;
}
return $self->{raw};
}
=item $dna = $seq->revcom;
Return the reverse complement of DNA sequence
=cut
sub revcom {
my ($self) = @_;
return "" unless $self->{type} eq 'dna';
my $dna = "";
if ( ref($self->{seq}) eq 'ARRAY' ) {
$dna = join("", @{$self->{'seq'}});
} else {
$dna = $$self{'seq'};
}
# First reverse the sequence
my $revcom = reverse $dna;
# complement but preserve case
# A->T, T->A, C->G, G->C
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return $revcom;
}
=item $seq->save([$filename]);
Save a Sequence file into the given $filename or look
for a $filename in $seq->file for the save.
Return false if unable to write.
Return true if successfully written.
=cut
sub save {
my $self = shift;
my $fn = shift || $self->file || return 0;
open FILE, ">$fn" or croak "Unable to write $fn: $!";
print FILE $self->seq;
close FILE;
return 1;
}
=item $seq->seq( [$value] );
get or set a seq entry
=cut
sub seq {
my ($self, $value) = @_;
if ( $value ) {
$self->{seq} = $value;
}
return $self->{seq};
}
=item $result = $seq->transpose($from, $to);
Returns a sequence string with all $from codes
transposed to $to codes.
=cut
sub transpose {
my($self, $from, $to) = @_;
my $dna = $self->{seq};
$dna =~ s/$from/$to/gi;
return $dna;
}
=item $seq->type( [$value] );
get or set a type entry
=cut
sub type {
my ($self, $value) = @_;
if ( $value ) {
$self->{type} = $value;
}
return $self->{type};
}
1;
__END__