BioFunctions.pm

#!/usr/bin/perl -w use strict; # documentation written in pod, perldoc perlpod =head1 NAME BioFunctions.pm =head1 DESCRIPTION Library of sequence functions: upper-case conversion, DNA2RNA translation, find Kozak motif, find_poly_tails, remove_poly_tails, count Kozak motif. =head1 SYNOPSIS use BioFunctions qw/:all/; use BioFunctions qw/upper_case translate_DNA_to_RNA find_KOZAK_motif find_poly_tail remove_poly_tails count_KOZAK show_eg/; =head1 USAGE show_eg(`podselect -s "EXAMPLES" $0`); my $RNA = translate_DNA_to RNA ($DNA_sequence); =head1 NOTES Kozak consensus or Kozak sequence, is a sequence which occurs on eukaryotic mRNA and composed of three bases of the start codon (AUG). The Kozak consensus sequence plays a major role in the initiation of the translation process. =head1 AUTHOR Student example from course work. =head1 FUNCTIONS Function signatures follow: =over 4 =cut package BioFunctions; BEGIN { use Exporter( ); our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); $VERSION = "1.00"; @ISA = qw/Exporter/; @EXPORT = ( ); # don't export without asking # create Export tags for convenience %EXPORT_TAGS = ( all => [ qw/ get_DNA_sequence upper_case translate_DNA_to_RNA find_KOZAK_motif find_poly_tails remove_poly_tails count_KOZAK show_eg / ], ); @EXPORT_OK = qw/ get_DNA_sequence upper_case translate_DNA_to_RNA find_KOZAK_motif find_poly_tails remove_poly_tails count_KOZAK show_eg /; } =item my $text = upper_case($DNA_sequence); This function converts $dna sequence from lower to upper case. Puts the string into all caps and return value. =cut sub upper_case { my ($DNA_sequence) = @_; return uc($DNA_sequence); } =item $data = translate_DNA_to_RNA($DNA_sequence); Translate DNA to mRNA by substituting U for T amino acids Returns only upper case sequences =cut sub translate_DNA_to_RNA { my ($DNA_sequence) = @_; my $RNA = upper_case($DNA_sequence); $RNA =~ s/T/U/g; return $RNA; } =item $text= find_KOZAK_motif($DNA_sequence); Check for presence of Kozak sequence "Found" or "Not Found" returned for any sequence having an AUG in it =cut sub find_KOZAK_motif { my ($seq) = @_; $seq = translate_DNA_to_RNA($seq); if ( $seq =~ /AUG/i ) { return("Found"); } else { return ("Not Found"); } } =item $text= find_poly_tails($DNA_sequence); Check for presence of Kozak sequence "Found" or "Not Found" returned for any sequence having an AUG in it =cut sub find_poly_tails{ my ($seq) = @_; $seq = translate_DNA_to_RNA($seq); if ( $seq =~ /[A]{10}/i ) { return("Found"); } else { return ("Not Found"); } } =item my $text = count_KOZAK(DNA_sequence); Calculates KOZAK motifs(AUG) =cut sub count_KOZAK { my ($seq) = @_; $seq = translate_DNA_to_RNA($seq); my $count = 0; my $length_of_RNA = length $seq; $count = ( ) = $seq =~ /AUG/mig; return $count; } =item my $text = remove_poly_tail($DNA_sequence); Removes polyA tails from the sequence if any =cut sub remove_poly_tails { my ($seq) = @_; # sequence out spaces to match repeated poly A motif $seq =~ s/\s//g; $seq = translate_DNA_to_RNA($seq); my $poly_motif='AAAAAAAAAA'; $seq =~ s/$poly_motif//ig; return $seq; } =item my $text = get_DNA_sequence($filename); Read it into array Create a long string and Remove the white spaces =cut sub get_DNA_sequence { my ($DNA_filename) = @_; unless ( open(DNA_1,$DNA_filename) ) { print "Can not open this file:$DNA_filename: $!\n"; exit(1); } my @DNA = <DNA_1>; my $DNA_sequence = join('',@DNA); $DNA_sequence =~ s/\s//g; close(DNA_1); return $DNA_sequence; } =item my $text = show_eg($text); Exit with text of examples without a header and remove last blank line. =cut sub show_eg { my @lines =@_; # remove 1st and last lines pop @lines; shift @lines; print @lines; exit (0); } 1; # successful -- library loaded