#!perl -w use strict; =head1 NAME Sequence class Sequence - Object to manage sequences. =head1 SYNOPSIS use Sequence; my $seq1 = Sequence->new(seq => "catcatcat"); my $result = $seq1->revcom; my $seq2 = Sequence->new(file => $filename); my $seq3 = Sequence->new(raw => $fasta_lines); @sequence = Sequence->new(); $seq = Sequence->new($name); $seq->save($filename); $seq->save; =head1 Sequence DESCRIPTION An sequence class makes an object for all sequence annotation. Saves the raw form, creates pure sequence and pure annotation areas. =head1 Sequence METHODS These methods suppport Sequence objects: =over 4 =cut package Sequence; use Carp; use File::Slurp; # our $AUTOLOAD; BEGIN { our $VERSION = '0.001'; $VERSION = eval $VERSION; } use constant FORMAT => qw / FASTA GenBank raw unknown /; use constant SEQ_TYPE => qw / dna rna protein /; # my %args = _args(@_); # handle either ->field($name) or ->field(name => $name) # my $name = (@_ % 2 == 0) ? '' : shift(); # my %args = _args(@_); # $args{name} ||= $name; sub _args (;@) { croak "Odd number of arguments passed into ", (caller(1))[3] unless (@_ % 2 == 0); # strip off any leading '-opt' my @args; while (@_) { (my $k = shift) =~ s/^-//; push @args, $k, shift; } return @args; } =item SUPER::new($name); Sequence can be inherited, use this call to properly initialize the base object. A sequence contains: file - filename name - name of file dir - directory where file resides data - sequence data info - name value pairs describing sequence form - file format; FASTA, GenBank, ... type - sequence type; dna, protein, ... A sequence is constructed with read($file) or create($sequence) or create(@sequence); =item $seq = Sequence->new(%args); Constructs a Sequence object. file => $filename, raw => $entire_sequence, seq => $sequence | @sequence, anot => %annotation, form => $file_format; FASTA, GenBank, ... type => $sequence_type; dna, rna, protein =cut sub new { my $self = shift; my $class = ref($self) || $self; $self = bless({ }, $class); return bless($self->init(@_), $class); } # Constructor helper method # my $seq = $self->init($name); # return $self->init($name); # initialize all variables & return $self or undef # read the raw data if not specified sub init { my $self = shift; my %args = _args(@_); # initialize variables, read any file entry if ( $args{file} and ! defined $args{raw} ) { my $lines = File::Slurp::read_file($args{file}); $args{raw} = $lines; }; $self->{file} = $self->{anot} = $self->{seq} = ""; $self->{raw} = $self->{form} = $self->{type} = ""; # catch any set parameters for my $item ( keys %args ) { $self->{$item} = $args{$item}; } return $self; } =item $seq->file( [$value] ); get or set a file entry =cut sub file { my ($self, $value) = @_; if ( $value ) { $self->{file} = $value; } return $self->{file}; } =item $seq->raw( [$value] ); get or set a raw entry =cut sub raw { my ($self, $value) = @_; if ( $value ) { $self->{raw} = $value; } return $self->{raw}; } =item $dna = $seq->revcom; Return the reverse complement of DNA sequence =cut sub revcom { my ($self) = @_; return "" unless $self->{type} eq 'dna'; my $dna = ""; if ( ref($self->{seq}) eq 'ARRAY' ) { $dna = join("", @{$self->{'seq'}}); } else { $dna = $$self{'seq'}; } # First reverse the sequence my $revcom = reverse $dna; # complement but preserve case # A->T, T->A, C->G, G->C $revcom =~ tr/ACGTacgt/TGCAtgca/; return $revcom; } =item $seq->save([$filename]); Save a Sequence file into the given $filename or look for a $filename in $seq->file for the save. Return false if unable to write. Return true if successfully written. =cut sub save { my $self = shift; my $fn = shift || $self->file || return 0; open FILE, ">$fn" or croak "Unable to write $fn: $!"; print FILE $self->seq; close FILE; return 1; } =item $seq->seq( [$value] ); get or set a seq entry =cut sub seq { my ($self, $value) = @_; if ( $value ) { $self->{seq} = $value; } return $self->{seq}; } =item $result = $seq->transpose($from, $to); Returns a sequence string with all $from codes transposed to $to codes. =cut sub transpose { my($self, $from, $to) = @_; my $dna = $self->{seq}; $dna =~ s/$from/$to/gi; return $dna; } =item $seq->type( [$value] ); get or set a type entry =cut sub type { my ($self, $value) = @_; if ( $value ) { $self->{type} = $value; } return $self->{type}; } 1; __END__