#!perl -w use strict; =head1 NAME Sequence - Class to manage sequences. =head1 DESCRIPTION An sequence class makes an object for all sequence annotation. Saves the raw form, creates pure sequence and pure annotation areas. =head1 USAGE use TwoBio; my $seq1 = Sequence->new(seq => "catcatcat"); my $result = $seq1->revcom; my $seq2 = Sequence->new(file => $filename); my $seq3 = Sequence->new(raw => $fasta_lines); @sequence = Sequence->new(); $seq = Sequence->new($name); $seq->save($filename); $seq->save; $seq->read($name); $seq->get_seq_files($name,[\@list]); =head1 Sequence METHODS These methods suppport Sequence objects: =over 4 =cut package Sequence; # use fields qw( file raw seq anot form type ); use Carp; use File::Slurp; our $AUTOLOAD; BEGIN { our $VERSION = '0.001'; $VERSION = eval $VERSION; } use constant FORMAT => qw / FASTA GenBank raw unknown /; use constant SEQ_TYPE => qw / dna rna protein /; # my %args = _args(@_); # handle either ->field($name) or ->field(name => $name) # my $name = (@_ % 2 == 0) ? '' : shift(); # my %args = _args(@_); # $args{name} ||= $name; sub _args (;@) { croak "Odd number of arguments passed into ", (caller(1))[3] unless (@_ % 2 == 0); # strip off any leading '-opt' my @args; while (@_) { (my $k = shift) =~ s/^-//; push @args, $k, shift; } return @args; } =item SUPER::new($name); Sequence can be inherited, use this call to properly initialize the base object. A sequence contains: file - filename name - name of file dir - directory where file resides data - sequence data info - name value pairs describing sequence form - file format; FASTA, GenBank, ... type - sequence type; dna, protein, ... A sequence is constructed with read($file) or create($sequence) or create(@sequence); =item $seq = Sequence->new(%args); Constructs a Sequence object. file => $filename, raw => $entire_sequence, seq => $sequence | @sequence, anot => %annotation, form => $file_format; FASTA, GenBank, ... type => $sequence_type; dna, rna, protein =cut sub new { my $self = shift; unless (ref $self) { # $self = fields::new($self); $self = bless { }, $self; } return $self->init(@_); } # Constructor helper method # my $seq = $self->init($name); # return $self->init($name); # initialize all variables & return $self or undef # read the raw data if not specified sub init { my $self = shift; my %args = _args(@_); # initialize variables, read any file entry if ( $args{file} and ! defined $args{raw} ) { my $lines = File::Slurp::read_file($args{file}); $args{raw} = $lines; }; $self->{file} = $self->{anot} = $self->{seq} = ""; $self->{raw} = $self->{form} = $self->{type} = ""; # catch any set parameters for my $item ( keys %args ) { $self->{$item} = $args{$item}; } return $self; } =item $seq->save([$filename]); Save a Sequence file into the given $filename or look for a $filename in $seq->file for the save. Return false if unable to write. Return true if successfully written. =cut sub save { my $self = shift; my $fn = shift || $self->file || return 0; open FILE, ">$fn" or croak "Unable to write $fn: $!"; print FILE $self->seq; close FILE; return 1; } =item $dna = $seq->revcom; Return the reverse complement of DNA sequence =cut sub revcom { my ($self) = @_; return "" unless $self->{type} eq 'dna'; my $dna = ""; if ( ref($self->{seq}) eq 'ARRAY' ) { $dna = join("", @{$self->{'seq'}}); } else { $dna = $$self{'seq'}; } # First reverse the sequence my $revcom = reverse $dna; # Next, complement the sequence, # keep any upper and lower case # A->T, T->A, C->G, G->C $revcom =~ tr/ACGTacgt/TGCAtgca/; return $revcom; } =item $result = $seq->transpose($from, $to); Returns a sequence string with all $from codes transposed to $to codes. =cut sub transpose { my($self, $from, $to) = @_; my $dna = $self->{seq}; my $result = ""; # the long way to do a regular expression my @from = split(/ */, $from); my @to = split(/ */, $to); ITEM: for my $item ( split(/ */, $dna) ) { for my $index ( 0 .. $#from ) { if ( $item eq $from[$index] ) { $item = $to[$index]; $result .= $item; next ITEM; } } $result .= $item; } return $result; } # Get or set any BaseObj field by default. sub AUTOLOAD { my $self = shift; my $type = ref($self) or croak "$self is not an object"; my $name = $AUTOLOAD; $name =~ s/.*://; # strip fully-qualified portion croak "Can't access `$name' field in class $type" unless (exists $self->{$name}); if (@_) { return $self->{$name} = shift; } else { return $self->{$name}; } } # explicit destroy required because of AUTOLOAD sub DESTROY { } =back =head1 FASTA CLASS FASTA - package to manage FASTA sequences =head1 FASTA DESCRIPTION Identify as FASTA format and executes FASTA methods. Adds a source field. =head1 FASTA SYNTAX my $fa = FASTA->new( file => $filename ); =head1 FASTA USAGE $fa->extract; =head1 FASTA METHODS =over 4 =cut package FASTA; # inherit from Sequence # use base qw/Sequence/; our @ISA = qw/Sequence/; # use fields qw/source/; =item $seq = FASTA->new(%args); Constructs a Sequence object. see docs on Sequence class Constructs a FASTA object. source => "source of sequence" =cut sub new { my $class = shift; # my $self = fields::new($class); my $self = bless { }, $class; $self->SUPER::new(@_); $self->{source} = "" unless $self->{source}; return $self; } =item $sequence = $seq->extract; =item @sequence = $seq->extract; Method to extract FASTA sequence data and annotation. The object retains the result in $seq->seq; and $seq->anot; Returns FASTA sequence as array or string if requested. =cut sub extract { my( $self) = @_; # Declare and initialize variables my @sequence = (); my @annotation = (); my $raw; if ( ref($self->{raw}) eq 'ARRAY' ) { $raw = $self->{raw}; } else { my @raw = split(/\n/, $self->{raw}); $raw = \@raw; } foreach my $line ( @$raw ) { # discard blank lines if ($line =~ /^\s*$/) { next; # keep comment lines as annotation } elsif ($line =~ /^\s*(#.*$)/) { push @annotation, $1; next; # keep fasta header line as annotation } elsif ($line =~ /^(>.*$)/) { push @annotation, $1; next; # keep line, add to sequence string } else { # remove whitespace data $line =~ s/\s//g; push @sequence, $line; } } $self->{anot} = join("\n",@annotation); $self->{seq} = join("",@sequence); return unless defined wantarray; # don't bother doing more return wantarray ? @sequence : "@sequence"; } =back =cut 1; __END__