#!perl -w
use strict;
=head1 NAME
Sequence - Class to manage sequences.
=head1 DESCRIPTION
An sequence class makes an object for all sequence
annotation. Saves the raw form, creates pure sequence
and pure annotation areas.
=head1 USAGE
use TwoBio;
my $seq1 = Sequence->new(seq => "catcatcat");
my $result = $seq1->revcom;
my $seq2 = Sequence->new(file => $filename);
my $seq3 = Sequence->new(raw => $fasta_lines);
@sequence = Sequence->new();
$seq = Sequence->new($name);
$seq->save($filename);
$seq->save;
$seq->read($name);
$seq->get_seq_files($name,[\@list]);
=head1 Sequence METHODS
These methods suppport Sequence objects:
=over 4
=cut
package Sequence;
# use fields qw( file raw seq anot form type );
use Carp;
use File::Slurp;
our $AUTOLOAD;
BEGIN {
our $VERSION = '0.001';
$VERSION = eval $VERSION;
}
use constant FORMAT => qw /
FASTA GenBank raw unknown
/;
use constant SEQ_TYPE => qw /
dna rna protein
/;
# my %args = _args(@_);
# handle either ->field($name) or ->field(name => $name)
# my $name = (@_ % 2 == 0) ? '' : shift();
# my %args = _args(@_);
# $args{name} ||= $name;
sub _args (;@) {
croak "Odd number of arguments passed into ", (caller(1))[3]
unless (@_ % 2 == 0);
# strip off any leading '-opt'
my @args;
while (@_) {
(my $k = shift) =~ s/^-//;
push @args, $k, shift;
}
return @args;
}
=item SUPER::new($name);
Sequence can be inherited, use this call to
properly initialize the base object.
A sequence contains:
file - filename
name - name of file
dir - directory where file resides
data - sequence data
info - name value pairs describing sequence
form - file format; FASTA, GenBank, ...
type - sequence type; dna, protein, ...
A sequence is constructed with read($file) or
create($sequence) or create(@sequence);
=item $seq = Sequence->new(%args);
Constructs a Sequence object.
file => $filename,
raw => $entire_sequence,
seq => $sequence | @sequence,
anot => %annotation,
form => $file_format; FASTA, GenBank, ...
type => $sequence_type; dna, rna, protein
=cut
sub new {
my $self = shift;
unless (ref $self) {
# $self = fields::new($self);
$self = bless { }, $self;
}
return $self->init(@_);
}
# Constructor helper method
# my $seq = $self->init($name);
# return $self->init($name);
# initialize all variables & return $self or undef
# read the raw data if not specified
sub init {
my $self = shift;
my %args = _args(@_);
# initialize variables, read any file entry
if ( $args{file} and ! defined $args{raw} ) {
my $lines = File::Slurp::read_file($args{file});
$args{raw} = $lines;
};
$self->{file} = $self->{anot} = $self->{seq} = "";
$self->{raw} = $self->{form} = $self->{type} = "";
# catch any set parameters
for my $item ( keys %args ) {
$self->{$item} = $args{$item};
}
return $self;
}
=item $seq->save([$filename]);
Save a Sequence file into the given $filename or look
for a $filename in $seq->file for the save.
Return false if unable to write.
Return true if successfully written.
=cut
sub save {
my $self = shift;
my $fn = shift || $self->file || return 0;
open FILE, ">$fn" or croak "Unable to write $fn: $!";
print FILE $self->seq;
close FILE;
return 1;
}
=item $dna = $seq->revcom;
Return the reverse complement of DNA sequence
=cut
sub revcom {
my ($self) = @_;
return "" unless $self->{type} eq 'dna';
my $dna = "";
if ( ref($self->{seq}) eq 'ARRAY' ) {
$dna = join("", @{$self->{'seq'}});
} else {
$dna = $$self{'seq'};
}
# First reverse the sequence
my $revcom = reverse $dna;
# Next, complement the sequence,
# keep any upper and lower case
# A->T, T->A, C->G, G->C
$revcom =~ tr/ACGTacgt/TGCAtgca/;
return $revcom;
}
=item $result = $seq->transpose($from, $to);
Returns a sequence string with all $from codes
transposed to $to codes.
=cut
sub transpose {
my($self, $from, $to) = @_;
my $dna = $self->{seq};
my $result = "";
# the long way to do a regular expression
my @from = split(/ */, $from);
my @to = split(/ */, $to);
ITEM:
for my $item ( split(/ */, $dna) ) {
for my $index ( 0 .. $#from ) {
if ( $item eq $from[$index] ) {
$item = $to[$index];
$result .= $item;
next ITEM;
}
}
$result .= $item;
}
return $result;
}
# Get or set any BaseObj field by default.
sub AUTOLOAD {
my $self = shift;
my $type = ref($self) or croak "$self is not an object";
my $name = $AUTOLOAD;
$name =~ s/.*://; # strip fully-qualified portion
croak "Can't access `$name' field in class $type"
unless (exists $self->{$name});
if (@_) {
return $self->{$name} = shift;
} else {
return $self->{$name};
}
}
# explicit destroy required because of AUTOLOAD
sub DESTROY {
}
=back
=head1 FASTA CLASS
FASTA - package to manage FASTA sequences
=head1 FASTA DESCRIPTION
Identify as FASTA format and executes FASTA
methods. Adds a source field.
=head1 FASTA SYNTAX
my $fa = FASTA->new( file => $filename );
=head1 FASTA USAGE
$fa->extract;
=head1 FASTA METHODS
=over 4
=cut
package FASTA;
# inherit from Sequence
# use base qw/Sequence/;
our @ISA = qw/Sequence/;
# use fields qw/source/;
=item $seq = FASTA->new(%args);
Constructs a Sequence object.
see docs on Sequence class
Constructs a FASTA object.
source => "source of sequence"
=cut
sub new {
my $class = shift;
# my $self = fields::new($class);
my $self = bless { }, $class;
$self->SUPER::new(@_);
$self->{source} = "" unless $self->{source};
return $self;
}
=item $sequence = $seq->extract;
=item @sequence = $seq->extract;
Method to extract FASTA sequence data and annotation.
The object retains the result in $seq->seq; and $seq->anot;
Returns FASTA sequence as array or string if requested.
=cut
sub extract {
my( $self) = @_;
# Declare and initialize variables
my @sequence = ();
my @annotation = ();
my $raw;
if ( ref($self->{raw}) eq 'ARRAY' ) {
$raw = $self->{raw};
} else {
my @raw = split(/\n/, $self->{raw});
$raw = \@raw;
}
foreach my $line ( @$raw ) {
# discard blank lines
if ($line =~ /^\s*$/) {
next;
# keep comment lines as annotation
} elsif ($line =~ /^\s*(#.*$)/) {
push @annotation, $1;
next;
# keep fasta header line as annotation
} elsif ($line =~ /^(>.*$)/) {
push @annotation, $1;
next;
# keep line, add to sequence string
} else {
# remove whitespace data
$line =~ s/\s//g;
push @sequence, $line;
}
}
$self->{anot} = join("\n",@annotation);
$self->{seq} = join("",@sequence);
return unless defined wantarray; # don't bother doing more
return wantarray ? @sequence : "@sequence";
}
=back
=cut
1;
__END__