package ZFdnaBS; use strict; use warnings; use DataBrowser; my @nucleotides = qw(A C G T); =head1 NAME ZFdnaBS - a module for storing Zinc Finger dna Binding Sites as matrices =head1 DESCRIPTION ZFdnaBS.pm gives you convenient, object-oriented access to DNA binding sites, that are stored as "fasta" formated matrices: >Matrix_ID 0.1 0.3 0.5 0.1 0.2 0.5 0.2 0.1 0.6 0.1 0.1 0.2 where 1st line with > is header; 2nd line and further: 1st column is A 2st column is C 3st column is G 4st column is T ------------------------------ =head1 TUTORIAL ------------------------------ ------------------------------ =head2 ZFdnaBS Class To use the ZFdnaBS module, create a ZFdnaBS object using a file with fasta formated matrices. use ZFdnaBS; my $ZFdnaBS = new ZFdnaBS(file with zinc finger DNA binding sites); List of methods: entry return array of headers in order they appeared in file; motifsNumber return number of motifs; motifs return reference to array of motifs objects; motif(pos_number) return motif object deleteAll deletes all content ------------------------------ =head2 ZFdnaBS::Motif Class To use the ZFdnaBS::Motif module, create a ZFdnaBS object using a file with fasta formated matrices, look at ZFdna Class; or use new method: my $motif = new ZFdnaBS::Motif(reference to matrix_motif); List of methods: percent(pseudo) convert to matrix_percent and overwrite old one; mLength return length of motif; getFreq(pos, nuc) return frequency for particular nucl and and position; getMatrix return reference to matrix; logOdds(backgroundMatrix) convert to logDdds_matrix and overwrite old one, considering background Matrix; reverseCompliment return reference of reverse compliment matrix printScreen print motif_matrix in Terminal; ------------------------------ =head2 ZFdnaBS::BarCodedBS Class Used for storing ZFdnaBS marking each of motif with a barCode. Create new ZFdnaBS::BarCodedBS module: my $BarCodedBS = new ZFdnaBS::BarCodedBS; List of methods: addMotif(barcode, position, motif) adds motif, using reference to motif; BarCodes return reference to array of sorted BarCodes; motifs(barcode) return reference to array of motifs objects; printScreen print object in Terminal; ------------------------------ =head1 AUTHOR Artem Zykovich (azykovich@ucdavis.edu) =head1 COPYRIGHT Copyright (C) 2008 Artem Zykovich. All Rights Reserved. =head1 DISCLAIMER This software is provided "as is" without warranty of any kind. =cut ############################################################################### ########## ZFdnaBS ####################################################### ############################################################################### sub new { my ($class, $file) = @_; my $self; my $entry; my $motif_number = -1; my $motif_position = 0; # read all file into memory open(IN, $file) or die; while () { chomp $_; if ($_ =~ /^>/) { $motif_number++; $self->[$motif_number]{entry} = $_; $motif_position = 0; } elsif ($_ =~ /^\d/) { my @line = split(/\t/, $_); $self->[$motif_number]{matrix}[$motif_position]{"A"} = $line[0]; $self->[$motif_number]{matrix}[$motif_position]{"C"} = $line[1]; $self->[$motif_number]{matrix}[$motif_position]{"G"} = $line[2]; $self->[$motif_number]{matrix}[$motif_position]{"T"} = $line[3]; $motif_position++; } else {next} } close IN; return bless $self; } sub entry { my ($self) = @_; my @entries = (); for (my $i = 0; $i < @$self; $i++) { $entries[$i] = $self->[$i]{entry}; } return @entries; } sub motifsNumber { my ($self) = @_; my $num = @$self; return $num; } sub motifs { my ($self) = @_; my @matrices; for (my $i = 0; $i < @$self; $i++) { $matrices[$i] = ZFdnaBS::Motif->new($self->[$i]{matrix}) } return \@matrices; } sub motif { my ($self, $position) = @_; return ZFdnaBS::Motif->new($self->[$position]{matrix}) } sub deleteAll { my ($self) = @_; for (my $i = 0; $i < @$self; $i++) { delete($self->[$i]); } } ############################################################################### ########## ZFdnaBS::Motif ########################################### ############################################################################### package ZFdnaBS::Motif; sub new { my ($class, $matrix) = @_; my $self; for (my $i = 0; $i < @$matrix; $i++) { foreach my $nuc (@nucleotides) { $self->[$i]{$nuc} = $matrix->[$i]{$nuc} } } return bless $self; } sub percent { my ($self, $pseudo) = @_; if (!defined $pseudo) {$pseudo = 0} my %p = (); for (my $i = 0; $i < @$self; $i++) { my $sum = 0; foreach my $nuc (@nucleotides) { $self->[$i]{$nuc} += $pseudo; $sum += $self->[$i]{$nuc}; } foreach my $nuc (@nucleotides) { $self->[$i]{$nuc} /= $sum; } } } sub mLength { my ($self) = @_; my $l = @$self; return $l; } sub getFreq { my ($self, $pos, $nuc) = @_; my $freq = $self->[$pos]{$nuc}; return $freq; } sub getMatrix { my ($self) = @_; my @matrix = (); for (my $i = 0; $i < @$self; $i++) { foreach my $nuc (@nucleotides) { $matrix[$i]{$nuc} = $self->[$i]{$nuc}; } } return \@matrix; } sub reverseCompliment { my ($self) = @_; my @rcMatrix = (); for (my $i = @$self - 1; $i >= 0; $i--) { my $j = @$self - 1 - $i; $rcMatrix[$j]{A} = $self->[$i]{T}; $rcMatrix[$j]{C} = $self->[$i]{G}; $rcMatrix[$j]{G} = $self->[$i]{C}; $rcMatrix[$j]{T} = $self->[$i]{A}; } return \@rcMatrix; } sub logOdds { my ($self, $backgroundMatrix) = @_; for (my $i = 0; $i < @$self; $i++) { foreach my $nuc (@nucleotides) { my $obs = $self->[$i]{$nuc}; my $exp = $backgroundMatrix->{$nuc}; $self->[$i]{$nuc} = log($obs/$exp)/log(10); } } } sub printScreen { my ($self) = @_; for (my $i = 0; $i < @$self; $i++) { print "$i\t"; foreach my $nuc (@nucleotides) { print $self->[$i]{$nuc}, "\t"; } print "\n"; } } ############################################################################### ########## ZFdnaBS::BarCodedBS ########################################### ############################################################################### package ZFdnaBS::BarCodedBS; sub new { my ($class) = @_; my $self = {}; return bless $self; } sub addMotif { my ($self, $barcode, $position, $motif) = @_; for (my $i = 0; $i < @$motif; $i++) { foreach my $nuc (@nucleotides) { $self->{$barcode}[$position][$i]{$nuc} = $motif->[$i]{$nuc} } } } sub BarCodes { my ($self) = @_; my @bc = (); foreach my $key (sort keys %$self) { push @bc, $key } return \@bc; } sub motifs { my ($self, $barcode) = @_; my @matrices; for (my $i = 0; $i < @{$self->{$barcode}}; $i++) { push @matrices, ZFdnaBS::Motif->new($self->{$barcode}[$i]) } return \@matrices; } sub printScreen { my ($self) = @_; browse($self); } 1;