#!/usr/bin/perl # Copyright (C) 2007-2011 Ian Korf, GenĂ­s Parra, and Keith Brandam # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. use strict; use warnings; use FAlite; use Getopt::Std; use vars qw($opt_w $opt_s $opt_r $opt_g $opt_d $opt_a $opt_c $opt_f $opt_o); ################################ # # S e t u p # ################################ # set up some default values my $window_step = 1; my $window_size = 7; my $donor = 5; my $acceptor = 10; my $cutoff = 1.2; my $gap = 5; # how far can two peaks be apart to still be considered one peak? my $gff = 0; my $weighting_factor = 200; getopts('w:s:d:c:a:f:g:ro'); $window_size = $opt_w if $opt_w; $window_step = $opt_s if $opt_s; $donor = $opt_d if $opt_d; $acceptor = $opt_a if $opt_a; $cutoff = $opt_c if $opt_c; $gap = $opt_g if $opt_g; $weighting_factor = $opt_f if $opt_f; $gff = 1 if $opt_o; die " usage: imeter_window.pl [options] options: -w window size nt [default $window_size] -s step size nt [default $window_step] -d donor sequence to clip [default $donor] -a acceptor sequence to clip [default $acceptor] -g minimum gap allowed between high scoring windows [default $gap] -c threshold value to decide what is a high scoring window [default $cutoff] -f weighting factor to penalize peaks that are further away [default $weighting_factor] -r calculate score for reverse strand -o print GFF info of each peak " unless @ARGV == 1; die "Window size (-w) must be at least 5\n" if ($window_size < 5); die "Window step (-s) must be <= window size (-w)\n" if ($window_step > $window_size); my ($FASTA) = @ARGV; ###################################### # # R e a d P a r a m e t e r s # ###################################### my %pentamer_scores; while () { my ($word, $score) = split; $pentamer_scores{$word} = $score; } ################################ # # M a i n L o o p # ################################ # check for standard input otherwise open regular filehandle my ($fasta, $input); if ($FASTA eq '-') { $fasta = new FAlite(\*STDIN); } else { open($input,"<", $FASTA) or die "Can't open $FASTA\n"; $fasta = new FAlite(\*$input); } while (my $entry = $fasta->nextEntry) { my ($id) = $entry->def =~ /^>(\S+)/; my $seq = uc $entry->seq; # clip sequence to remove donor and acceptor? $seq = substr($seq, $donor, -$acceptor); # loop over input sequence in pentamer windows, and extract score from %pentamer_scores my @records; for (my $i = 0; $i <= length($seq) - 5; $i++){ my $subseq = substr($seq, $i, 5); # are we working on reverse strand? $subseq = reverse_complement(substr($seq, $i, 5)) if ($opt_r); $records[$i]{start} = $i; $records[$i]{end} = $i + 4; $records[$i]{score} = $pentamer_scores{$subseq}; # print "$records[$i]{start}-$records[$i]{end}) $records[$i]{score}\n"; } # now we want to make a list of the windows whose average score (per base) exceeds the $cutoff value # this loop starts at the middle position of the first window (e.g. position 3) # and loops through the middle position of the last window # but will calculate the window score from 1st base to end of window (e.g. 1 to 5) my @windows = (); my $high_scoring_window_count = 0; for (my $i = 0; $i < @records - $window_size + 5; $i += $window_step){ my ($start, $end) = ($i, $i + $window_size - 1); my $window_score = 0; # now loop through all pentamers that comprise the window and add their score to $window_score for (my $j = $start; $j + 4 <= $end; $j++){ $window_score += $records[$j]{score}; } # if window score is above cutoff, store details...but store average score per base if ($window_score >= $cutoff) { my $avg_score = $window_score/$window_size; $windows[$high_scoring_window_count]{start} = $i; $windows[$high_scoring_window_count]{end} = $i + $window_size - 1; $windows[$high_scoring_window_count]{avg_score} = $avg_score; $high_scoring_window_count++; # print "$start-$end\tHigh scoring window \#$high_scoring_window_count\tavg_score per base = $avg_score\n"; } else { # print "$start-$end\n"; } } ################################ # # F i n d P e a k s # ################################ my @peak; my $peak_counter = 0; # Genis automatically defines first peak as equal to the first high scoring window. Why? $peak[0]{start} = $windows[0]{start}; $peak[0]{end} = $windows[0]{end}; $peak[0]{avg_score} = $windows[0]{avg_score}; # loop through remaining high scoring windows I.e. start at $i = 1 rather than 0 for (my $i = 1; $i < @windows; $i++ ){ # print "$i) $windows[$i]{start}-$windows[$i]{end}) avg_score = $windows[$i]{avg_score}\n"; # now ask whether current window is within $gap nt of current peak if ($windows[$i]{start} - $peak[$peak_counter]{end} < $gap){ # print "\tBEFORE: Peak $peak_counter\t$peak[$peak_counter]{start}-$peak[$peak_counter]{end} $peak[$peak_counter]{avg_score}\n"; # if window overlaps peak, then change end coordinate of current peak... $peak[$peak_counter]{end} = $windows[$i]{end}; # also change the score of the peak by making a new average score $peak[$peak_counter]{avg_score} = (($peak[$peak_counter]{avg_score} + $windows[$i]{avg_score}) / 2); # print "\tAFTER: Peak $peak_counter\t$peak[$peak_counter]{start}-$peak[$peak_counter]{end} avg_score = $peak[$peak_counter]{avg_score}\n\n"; } else { # at this point you have defined one peak and can start looking for the next one # increment peak counter and set default values of 2nd peak to be that of current window (???) $peak_counter++; $peak[$peak_counter]{start} = $windows[$i]{start}; $peak[$peak_counter]{end} = $windows[$i]{end}; $peak[$peak_counter]{avg_score} = $windows[$i]{avg_score}; } } ################################ # # S c o r e P e a k s # ################################ my $imeter2_score = 0; for (my $i = 0; $i <= $peak_counter; $i++ ){ my $peak_score = 0; # just skip forward if there are no peaks next if not defined($peak[$i]{start}); # print "$i) $peak[$i]{start}-$peak[$i]{end}\n"; for (my $j = $peak[$i]{start}; $j <= $peak[$i]{end} - 4; $j++){ $peak_score += $records[$j]{score}; } # calculate a weighted score based on distance of peak # should have two variables here? distance based on start or middle of peak? # 1/200 could also be variable # include a donor offset in case a large amount of sequence was clipped my $weighted_score = $peak_score * exp(-($peak[$i]{start}+$donor) * 1/$weighting_factor); # print "$i) $peak[$i]{start}-$peak[$i]{end}\tavg_score per base = $peak[$i]{avg_score}\tTotal peak score = $peak_score\tweighted score = $weighted_score\n"; printf("%s\tIMEter\tpeak\t%d\t%d\t%.2f\t+\t.\t.\n", $id, $peak[$i]{start}, $peak[$i]{end}, $weighted_score) if ($gff); $imeter2_score += $weighted_score; } # with large window sizes can end up in situations where individually positive scoring # windows can be connected by short regions of negative score. Can end up producing a single # large negative peak. So should always set to zero if this happens. $imeter2_score = 0 if ($imeter2_score < 0); # print out final score my $formatted_score = sprintf("%.2f",$imeter2_score); print "$id\t$formatted_score\n" if (!$gff); } close $input if ($FASTA ne '-'); exit(0); ############################ # # S U B R O U T I N E S # ############################# sub reverse_complement { my ($seq) = @_; $seq = reverse $seq; $seq =~ tr[ACGTRYMKWSBDHVacgtrymkwsdbhv] [TGCAYRKMSWVHDBtgcayrkmswvhdb]; return $seq; } exit(0); __DATA__ CGCCG 1.9580717108796 CGGCG 1.89858928684913 CGATT 1.68863052531273 CGCGA 1.53667722117749 CGATC 1.53403084338351 CGTCG 1.46858951901267 TCGAT 1.46420405405747 CGACG 1.44336022300819 TCGCG 1.44145179886508 GGGTT 1.35746397134248 TTCGA 1.33065188608934 TCCGA 1.31535436911392 TAGGG 1.30875596065583 GATCG 1.29712261029596 CCGAT 1.23538159551726 AGGGT 1.22400503605628 CGAAT 1.21952606581964 CGCGT 1.16873483963394 CTGGG 1.16846062383037 AATCG 1.16092466449249 TCGAA 1.10557017686345 ATTCG 1.09071473475617 ATCGA 1.08390957678889 GCGAT 1.07898018687561 CTCCG 1.05658125879546 CTTCG 1.04815333484276 ATCGG 1.03260130108353 CTCGA 1.028601858824 TCGGA 1.00050734586122 TCTCG 0.971621826745853 TTCGT 0.952693930736816 CCGCG 0.947333620700545 TGGGT 0.932790145711152 GTTCG 0.925407959931484 TTTCG 0.923184630271459 GCGCG 0.904950787100565 CGCGC 0.896010742747351 GATTT 0.884967290549799 CGGAT 0.839427214380983 TTAGG 0.833409505673864 TTCGC 0.829408458555992 ACGCG 0.823414896392352 TCGTC 0.816841155397732 GATTC 0.813734269542041 GGCGA 0.810974638891551 GCGAA 0.801083708944288 TCGTT 0.794851840541371 GATCT 0.79228837704842 TCGAG 0.77551545056079 CGGAG 0.772681475947738 GAATT 0.768895625240019 GCTCG 0.765717528197974 TCGTG 0.765536197778033 ATCGC 0.753830731787942 TTCCG 0.753777067085019 AGATC 0.743416759544082 GGATT 0.738459869539721 GTCGG 0.732836783296118 CGAGA 0.720740440485157 TCGCT 0.718581034584657 GGGGG 0.70448620469887 ACGAT 0.70069365311742 TCGGC 0.679770548392498 ATCCG 0.67158552743916 GAATC 0.668362418258892 CGATG 0.668253666741412 ATCGT 0.663349985957075 GGATC 0.653414789993102 TGATT 0.651859535050857 GATTG 0.651371333188124 GTCGA 0.640126339788273 TACGA 0.638587759938491 TGCGA 0.635404435732243 CGTTG 0.620786927905642 CTCTC 0.612646291653529 TTGGG 0.611663191383443 AGCGA 0.609359977687843 GGTTT 0.60678020402394 TCGCC 0.600166777163455 CGGAA 0.599570252432654 TCTGG 0.575313160853235 TTGAT 0.57405149184092 AATTG 0.56428349148006 GACGA 0.556870881776073 TTTGA 0.548013098283707 CATCG 0.539451689142155 CTCGC 0.538257679660418 AATTT 0.537571354653054 AGATT 0.533597316279989 CGAAA 0.533279116412035 TCCGT 0.530125273679323 TGGGG 0.528076971661135 CGTGA 0.521442707482325 GATCC 0.520384510537196 GGGGT 0.518610594926785 CGTTT 0.517700993843756 CAATT 0.512335633746589 CTCGT 0.503807665299624 TCTCT 0.502758812047804 GCCGA 0.500884405642724 TCCGG 0.490277650758596 CTAGG 0.489607837019728 CCGCC 0.488352773834104 TCGTA 0.487567615078276 CGAGT 0.487284506446613 ATTGA 0.47611957533133 TTGCG 0.474565323488285 TCTTC 0.471488172619884 CGAAG 0.471402704904167 TGGAT 0.465313844186266 GGAAT 0.465230248365438 CCGAC 0.464033094638691 GAGAT 0.455860053701235 AATTC 0.455406775124901 CGTGT 0.454670371731613 ATCTG 0.452193583001034 TAGGT 0.449154206206427 TGTCG 0.448125416452959 ACGAA 0.440473712196741 CCGGA 0.439382429577612 CGTCT 0.437924187673594 GCGTT 0.436974343823236 CGGTG 0.430347170398538 GGGTC 0.425926824253123 TCGCA 0.425689888312575 TTTAG 0.425293677388668 GCGAG 0.421765016102562 GTTGA 0.420638571947255 GATTA 0.41775407143528 TTGAG 0.414369128014788 ATTTG 0.413158501792671 TTTTG 0.403318742169936 TATCG 0.400953982619998 TAGAT 0.400516415097981 TTACG 0.391756796723354 AAATT 0.389639056870056 TGAAT 0.386753486554355 TTCTC 0.380445833910427 CCGGC 0.379489298128071 ATTAG 0.378677957438089 TCAAT 0.377132106537503 CTCTG 0.37709182435122 ATTGG 0.375823413399552 TGATC 0.375193304905936 TTGGA 0.369752324992483 ACGGA 0.369295320906443 GGGAT 0.365367420678465 GACGG 0.359381724679496 TTTGG 0.356224770403762 TCACG 0.353325980376382 CGCTT 0.353028548143417 CACGA 0.352287271674337 TGCGT 0.351674627796777 GTTTC 0.351013202018079 CGTAG 0.349831696639845 CGCGG 0.34396286449893 TTGAA 0.342787770792165 AGGTT 0.341919255188085 GAAAT 0.341655671021215 GGGGA 0.339804383017196 CTGGA 0.335536987628781 CAGAT 0.329676913524751 ATTTC 0.327881399863137 GCGGA 0.327765636971128 GTGTT 0.325619681351018 TTTTT 0.320951827266984 AACGA 0.319371976534809 GTTTT 0.31816186232913 TTAGA 0.316456895662144 CGTTC 0.313556450216839 GTCGT 0.309525909849445 TCGAC 0.308698273160184 CGAGG 0.307305840340378 GTTTG 0.307300685105561 TTCGG 0.304505396159422 TCTCC 0.303393354781151 GGTTG 0.301577309497946 GAGCT 0.299993053517199 GGTCG 0.299763060588394 GTGGG 0.298397747036893 ACGAG 0.293236965909734 TCTGA 0.289173286924383 GCCGC 0.285859909935361 AATCT 0.285726199407039 ATTTT 0.285709175242844 ATCTC 0.281640124849346 AGCTC 0.281167507199299 CGTAA 0.276016433637018 CGGCC 0.272765530402771 GTACG 0.271830940622156 TCTGT 0.269055158920522 TCCCG 0.267823096514846 TACGG 0.265960352497656 TTTGT 0.265782951526918 TGTTG 0.263923954693901 CTGCG 0.261682715162143 CCTCG 0.260590816456668 TCGGG 0.260062588916757 TGAGA 0.260047132789181 GGAGA 0.257828947693203 GTCGC 0.257391073772486 CGCTC 0.257330047354216 TTGTG 0.257051047670598 TAATT 0.256860674873277 TTGTT 0.256476688297139 GTGAT 0.256013399993321 CGGGA 0.249991237080519 CTTCT 0.249741497412638 ACGCC 0.249321410383867 GATGA 0.247063321083149 TGTGT 0.246695940977044 CGAAC 0.245670202940741 CCCTA 0.245615782233538 TGGGA 0.243619184743254 GGGAA 0.243204438429951 CCCCG 0.242876739384615 ATGGG 0.242486698817857 CCCGC 0.237391200300885 AACCC 0.234574535913631 GTTGT 0.232554587478665 CGTTA 0.232362361442737 GAGTT 0.231563123534815 GCCGG 0.23021716741315 GAGCG 0.228929621552689 CGAGC 0.222168377576661 TGTTC 0.221044181947151 GAGAA 0.219555264301105 TCGGT 0.219518834551903 GAGAG 0.208380831657565 CGCCC 0.206459340748994 AAATC 0.202859754946927 GGTGA 0.201215619117446 GGTTC 0.20119441228521 GTTTA 0.197274799154291 TTCAA 0.196414917939348 GTGAG 0.195691909020497 TGAGT 0.195405159459963 GTAAT 0.192303996850478 TGGAG 0.19040931949048 GCGTA 0.189198942193659 TAGCG 0.18636154794557 CGTAT 0.185853613303813 GGCGT 0.185302689152431 CGCCA 0.184473315722928 CTCGG 0.184193634614107 GTGTG 0.183498192305683 TCAGA 0.18323943613229 TGGCG 0.182894048644891 TGTTT 0.182863893584129 TTCTG 0.182503573141235 GTGAA 0.182355915887428 GCTTC 0.181551563518676 CGGGG 0.180924248641857 AGGAT 0.180227074287396 AGTTT 0.17541070959167 AGCTT 0.174294364975844 TGTGA 0.171158732676561 GAAGA 0.170248578242138 GTGGA 0.169784645058391 CCGTC 0.168578101620764 GATCA 0.167382101568694 CTGAT 0.167322584595177 TCCTC 0.166732486722795 GTGCG 0.165984228617908 TTGGT 0.163434885624097 TGGAA 0.16270220029749 AATTA 0.16207553170491 TCTAG 0.157143946846453 CAACG 0.154929040108913 CCGAA 0.154929040108913 ATTGT 0.154693786126999 TAGCT 0.153151852856246 GGGCT 0.153036980767704 GTTCT 0.152082551107451 GTAGA 0.149061010353521 TTAGC 0.144383940326387 CGTCA 0.140668346983743 GGGTA 0.140559448794974 CGTGG 0.134170479942115 CTACG 0.134170479942115 GGTGG 0.133438085456013 GTTAG 0.133249493795218 AAAAA 0.133170344691739 TGGTT 0.133050585399332 GCGTC 0.130000643813058 TAGGA 0.128728958342207 GGAGC 0.128344254517951 GACGC 0.125782694449396 CCGTT 0.125533645090069 TGACG 0.122945093138699 TTTCT 0.121855474178757 GTCCG 0.113866383487026 GCGTG 0.113108864414286 AGGGC 0.111561530799392 AGAAT 0.111258568451936 TGAGC 0.109277927836196 CAATC 0.109263440473484 CGCCT 0.108197989763922 TCCGC 0.107895506265405 GCTTT 0.107861191018911 GTCTC 0.107075916847081 TAAAG 0.10666874655683 CGATA 0.103513530608363 TAATC 0.101940116363427 AGTCG 0.101901676469774 AGAGA 0.0988748439961036 ATTCT 0.0976162597186412 TTCTT 0.0962796044234169 TGAAG 0.0948141586755538 TTTTA 0.0939130541787958 CGCTG 0.0925065395750977 CACGC 0.0921932847609499 ACCCT 0.08627267944197 TGGTG 0.0853284704115846 GGTTA 0.0845575055838716 AAGTT 0.0842766821489443 CTGTT 0.0830686734105418 AGGTC 0.0801612717069403 CTGTG 0.0792803021494396 CTCAA 0.0781112208452093 CGACC 0.0775541961797058 ATTCA 0.0775272398485388 TAGAG 0.0766006794239527 TGAAA 0.0758731720063378 CGGCA 0.0752018476381781 TTAGT 0.072113167851472 GAGGT 0.0703872988835497 GTGTA 0.0693807754904059 GTTGG 0.0686552659669205 CCAAT 0.0656704297759193 AAAGT 0.0637488686883097 CCCGA 0.0632403104170584 GAGTG 0.0630515970208145 ATTCC 0.0582203335779393 CTCTT 0.0576580995101856 TGAGG 0.0535834856176667 AAGCG 0.0534675552836506 TGATG 0.0497454416855899 TGGGC 0.0470484998961702 TAGTG 0.0467875759903206 AATCC 0.045822269718321 CGACT 0.0456075497392072 TGTGG 0.0441939650355964 CCGTG 0.0441819746013736 CTTTG 0.0427124247636199 GAGTC 0.0412082321542124 GGAGT 0.0383577943959422 CGCAT 0.0375720894707539 TCAAA 0.0371352321774813 CTGAG 0.0369070657519436 TTTTC 0.0366421097023559 GGCGC 0.0366090729201512 AGTGT 0.0354963740733539 GAACG 0.0348137564089057 AAGAT 0.0334391928386216 GAGTA 0.0327376209381121 TTAAT 0.0321872234918359 ACGAC 0.0317299753112403 GAAGC 0.0295266088145157 ACGGG 0.0293981580250534 GTTCC 0.0289817357272023 CGCAG 0.0267033299469884 GATGT 0.0257889892936043 TTGCT 0.0249910986551121 TTTCA 0.0246562822548905 GCCCG 0.0216625092454484 ATTTA 0.0216133133224111 TTTGC 0.0200351919652183 AATCA 0.018684962131576 TTTAT 0.0168377408198272 AAGGG 0.0153575066289381 GCTCT 0.0151376799457114 CTTAG 0.0138186669132079 ATGAT 0.013507089512147 AAGCT 0.013248418031724 AGGCG 0.0123246446661122 AATGG 0.0121617494101195 AGACG 0.0104204832107867 TGTAG 0.00756817617997382 GAGGA 0.00710162263505581 TTGTA 0.00614676295493688 GGTGT 0.00394228699535577 CGGAC 0.00292594666386249 CTAAT 0.00248807446910671 CGGCT 0.00201313736995697 AAAAT 0.00156381949700917 ATGGA 0.000369848014199603 TTCTA -0.00129334411715977 GGCGG -0.00261223687756752 TATTG -0.00317543449371773 CTTCC -0.00446353054445054 TAGTT -0.00493125748055918 ATCTA -0.00801179418371471 TTCAG -0.0083704845183599 CTCCT -0.00876015953722325 GTTCA -0.0108744256148423 CTTCA -0.0110236120203611 CCAGA -0.0112879125558385 CAAAT -0.0113527064645012 GGAGG -0.0128570505770652 CCGAG -0.0132255659205478 AAAGG -0.0132975938911409 AGTGA -0.0137211578211973 GGAAA -0.0138547511290648 TGCTT -0.0140078904149838 TTTAA -0.0148040880732568 GCGGC -0.0149959613333999 TCATC -0.0153620882747128 ATACG -0.0169947474593978 ACTCG -0.0169947474593978 GGGCA -0.0182198622076862 TCTAT -0.0206425244745421 CGTCC -0.0207506360592072 TACGT -0.0208315444605166 GTTGC -0.0221133662025512 GGTCT -0.0240610137740121 AGATG -0.0253698642966061 TCTTT -0.0257534864564362 TGTAA -0.0292083989664887 AGCTG -0.0305663840601226 ATGAA -0.0312610156961839 CTCTA -0.0315356374833311 ATGCG -0.0324617847834652 GGAAG -0.0355204887247811 ATTGC -0.0355752227190473 TTATG -0.0380110160434741 ATCTT -0.03818710383648 TTAAG -0.0389938385893288 AGGGA -0.0396802247139974 AAGGT -0.0410986772281356 AGAGT -0.0428262850640291 GCTCC -0.0436166392731677 TTCAT -0.0458977356417383 CCTCT -0.0465907398812604 GAAAA -0.0468250792912201 GGTAA -0.048980202627789 TCTCA -0.0495658084931177 TGTAT -0.0496024480634805 GTCAA -0.0516161280706739 AGTTC -0.0524375435402492 GCAAT -0.0528209775101375 GCTTA -0.0544047663370141 CTTTT -0.0545320943291384 GTATC -0.0565130816669947 CGCAA -0.0565750650847993 GTTAA -0.0569671520116333 TTCCT -0.0581144228768585 CTAGA -0.0581308859745691 TCTGC -0.0583569182383258 TATAG -0.060593391860491 TAGAA -0.0636355863556268 GGGAG -0.0641882491946744 TAACG -0.0652067015230943 TTAAA -0.0654704162477162 GTCTG -0.0697264327361046 GGTCA -0.0700266541095165 ATCAA -0.0738747317943393 GCTTG -0.0743337695733435 AGAAA -0.074399634286671 GAAAG -0.0745172974537947 ACGTT -0.0747949095621474 GCTCA -0.0752992206416396 ATCAG -0.0760515004796167 CCTTC -0.0772444020201207 GATGG -0.0779051279491823 GTGGT -0.0779192542218771 AAGGA -0.0800952893114639 AGTTG -0.0805749077732114 TGCTC -0.082941875031097 TATGG -0.0829617770181421 TAAGG -0.084946809361472 TTCAC -0.0858340572645679 GGCCG -0.0860790593948824 TCCAA -0.08626603386908 TGTTA -0.0863336210619895 TCCCC -0.0896545136187097 AAACG -0.0901002741117809 TTTCC -0.0928104191820169 CGACA -0.0929984733346731 GTAAA -0.0955048483793725 GTGTC -0.0963962420358637 GAGGG -0.0972107246215873 TAAAT -0.0979775665299235 TCCTT -0.0980029621869184 ACGTA -0.100503642991543 CAAAA -0.100852351412775 CTGAA -0.101507734317218 CCTAA -0.10153010695579 CTATG -0.102992442331229 GTATT -0.104891041987009 AGGAG -0.105598510114306 GTAGC -0.106319851832507 GGAAC -0.106698544510636 CTCAG -0.107479060075415 CGCTA -0.108543275893941 CCCAA -0.108763529532322 GAATG -0.109256831026262 AGTAG -0.109552615466826 CACCG -0.109898647547932 CGTAC -0.111201753633157 GGCTC -0.1123438230541 ATTAA -0.112411780535553 TAAGA -0.112732616371407 AGGAA -0.11311762915987 ATGTG -0.114348004127255 TTCCC -0.114386155100207 TTATT -0.114974000942771 GTAGT -0.115379833367845 TATGA -0.115638687172146 GCCGT -0.116834754951086 ACGGC -0.116875575352614 GCTGA -0.116962418252402 CGGTT -0.117736587708759 CTTTA -0.118570432812007 CTTTC -0.119563711980698 GATAA -0.119732252355718 GAAGT -0.120742605981724 CCGTA -0.121210353348244 ATAGA -0.121273680340135 CGGGT -0.121588595088504 CCACG -0.121638521378574 GCGAC -0.122604935419996 AGAGC -0.122604935419996 GATAG -0.122895772038437 AGGGG -0.12459348851345 TCACT -0.126338291180154 CCCAG -0.12827562304909 GTCTT -0.128413753763033 ATGTT -0.128600854980021 TCTTA -0.133513872564847 CCGGT -0.133931353092787 ATCCA -0.134446154171758 TTATC -0.134984830452472 TCTAA -0.135379870277452 ATGAG -0.135390526617442 CTTGA -0.135458719873427 GTATA -0.137072771158831 TATGT -0.138825179229171 GCTGG -0.140198995434735 TACGC -0.14033110431637 GAAGG -0.145786784495705 GTTAT -0.146065760154842 TCTAC -0.151928154906749 GGATA -0.154126837063967 CAAAG -0.155029446211342 GTATG -0.155383229516454 CGGTC -0.157348884744731 TCAGC -0.15869003320288 TTGAC -0.159193535952887 TGCCG -0.1599442972445 CGTGC -0.160012624462343 AGGTG -0.160273191590137 AAGAA -0.160383343685728 CTCCA -0.161213874482313 AATGA -0.16127677022315 ACCCA -0.162900486564829 ATGGT -0.16403955013231 CGCAC -0.16476058258245 TTGTC -0.16487656193133 AAAGA -0.165200663434147 GCTAG -0.165355581989114 AGTCT -0.166858639669001 GGCTT -0.170787705398674 AAGAG -0.171611657297936 AGAAG -0.17179047413075 CTGCT -0.174220282072523 ATTAC -0.175466408198939 CAATG -0.176015928135884 AGTGG -0.177646298977958 AACGT -0.177975831880462 AGCGT -0.17827016711347 AGCTA -0.178352636833384 CCTAG -0.178494693616279 TCTTG -0.179338080871652 CCTCC -0.17953935139613 AAAAG -0.180805345454524 CTTAA -0.182855816521811 GACTC -0.182969061188353 GGATG -0.184123232569659 ACTTT -0.18687057939662 CTTGT -0.187410721524429 CTGTA -0.188122346344204 ACCGA -0.189928810087258 AATGT -0.190191140129755 GATGC -0.193463305322859 TTCCA -0.194455220439674 AACGG -0.195236482129828 ATCAT -0.195313336372864 ATAAA -0.195730108737853 CACGT -0.195853917450635 CTCCC -0.196583321384717 GACTT -0.197133993672318 GACGT -0.198137566587354 CCTTT -0.198224086305773 TTTAC -0.198707914505788 GTAGG -0.199643817906037 TGTCT -0.200155106299296 TCCAG -0.200379635481819 CCCGG -0.200946386701788 ATAGG -0.202580312140592 AAATG -0.20275429355622 ATGTA -0.202821751287424 CTATC -0.204211054818863 ATTAT -0.204381809440182 CTAGT -0.204692531699375 TGATA -0.210501478471669 CCAAA -0.212401605770642 ACTCT -0.213195668752932 ATAGT -0.213309405351875 TATCT -0.213418339744376 ACGTC -0.216387081740613 TAATG -0.21753958923616 TAAAA -0.217648516518661 GCGCC -0.219466474672586 TGCGG -0.220307451111956 TGCAA -0.221634311355296 CAGCT -0.222180221141007 TCATT -0.223125028926298 CTGGT -0.223129882553469 CAGAG -0.225774727140809 GAAAC -0.226005806684363 CAAGT -0.226085597824664 ACGCA -0.226425332913643 CTCAC -0.230035295578686 CACTC -0.230779793904348 AAACC -0.232218585277904 GGTAG -0.232732020238626 CTATT -0.234498360624001 TAAGC -0.235245072118329 TGACT -0.23680735980625 TTACT -0.236911710550822 TAGTA -0.240080882874468 TCATG -0.241909083138178 TTATA -0.242763790398092 TGCCC -0.243451115216323 GGGTG -0.243544614295483 TCAAG -0.244600781684768 TATTT -0.245950943710661 ATCCT -0.246070625769649 AAGTG -0.24661822542927 TCAGT -0.248007366714394 CAGTG -0.248102605909777 CGGTA -0.248721698868125 ATAAT -0.250798475718849 GTTAC -0.251917821518304 CAGAA -0.252423175906953 ACGTG -0.25299105430078 GGTAT -0.253867519195786 GGCTA -0.253891850421226 TGGTA -0.254346634174747 TAGTC -0.254420338879079 GACCG -0.256182755956131 TGGTC -0.256187497865296 TATAA -0.257049895018083 TCAGG -0.257057287695906 GTAAG -0.260108459169931 AGAGG -0.260108459169931 AGTAA -0.260393774120112 GAGGC -0.264224567642163 GCACG -0.264666741132236 GGACG -0.264846378176594 CATTG -0.26801143652696 CATAG -0.269187753080794 AAAGC -0.269780193990774 ACCAA -0.271684810477551 GAACA -0.272932499527515 ACAAA -0.273272918368379 GAACT -0.274540340512518 CATTT -0.276650756745857 CCCTT -0.277267475702067 CCGCT -0.279253508436635 ACGGT -0.280380854196193 GAATA -0.284088922849814 TGGCT -0.284685268252205 ACTTG -0.28496239213274 GTCTA -0.286117869623847 GATAT -0.287990317689824 ACAAT -0.289915766197521 GCTAA -0.290386486338294 GCCCA -0.291257157740595 CTTGG -0.292286346386073 CTAAA -0.29397707681115 CACTG -0.295317115234149 TCCAT -0.295585337659628 TAAGT -0.298454770819414 GAACC -0.298582606984567 TATTC -0.30010865854033 GCAGA -0.300147096428379 CTACT -0.301427225140911 TACTT -0.302100309027599 CATGG -0.304570993230989 ATCAC -0.305850082167107 GCAAA -0.306578071322227 AGGTA -0.306582888969432 ATAAG -0.307155382070352 AGTTA -0.308612271300568 AGTAT -0.308678254499578 TACCG -0.308712888831995 TCCTG -0.311435937546406 GTGAC -0.312601335767775 TACTC -0.313002117176577 CCCCA -0.313219795629494 AACAA -0.317024498783058 AACTT -0.320634880179708 CATGA -0.321446541661719 TACAA -0.322104163482499 GAGAC -0.322441232190271 ACTAG -0.322470787739837 GTGGC -0.325035095790704 GGACT -0.326419619785983 TAGGC -0.3271760796692 ATATA -0.327864761547153 AAAAC -0.329815653246362 TCCAC -0.331695541418951 GGGCG -0.332864801605245 TCCCA -0.332864801605245 TCACC -0.333222424397865 CAAGG -0.333308689150523 TTGCA -0.334017670080039 TGAAC -0.334329330628778 ACAGA -0.336591544974004 CTAAG -0.336672912510338 CCATT -0.337032940626343 GGTAC -0.340396248127567 GTCAG -0.340715216742815 GCTAT -0.341245221895337 TTGGC -0.341997325080214 GACAA -0.342600660175259 AGCGC -0.342934890299552 GGGAC -0.345144563025731 GCTGC -0.345641273321871 CAGCG -0.347571300420271 AGGCT -0.34870238243892 GAGCC -0.349113465228677 TATAT -0.34955646054428 AAGTC -0.350443394722123 ATCCC -0.35175334710138 TGTAC -0.352188959411617 AACGC -0.352554707934526 AAGTA -0.352896607451902 AGAAC -0.355346255942856 TTGCC -0.356359850373295 TCCCT -0.357651584792863 GAGCA -0.358556947258026 AGCCG -0.359050308863567 TGTGC -0.360007029224052 CTTAT -0.361635636057526 TATTA -0.363102452493257 GCTGT -0.36973295034443 AGATA -0.371626009910817 CCAAG -0.372269134805403 ACTGT -0.373689709868815 AATAA -0.37541623680567 ACACG -0.381896939567974 GGACC -0.38366349747421 TACCC -0.384300604191272 ACTTC -0.384928378848172 CCCCT -0.387153003918898 TTACA -0.389368201021271 TCAAC -0.389983463103983 AAACT -0.390135325082596 CTAGC -0.390443107681279 ATGTC -0.392534899728385 CATCT -0.393489339896008 TCATA -0.395211256475601 GTCCT -0.395565243735099 GATAC -0.395579404283118 CCCTC -0.398029610666563 TCCTA -0.398099462242257 GCATG -0.398582369367124 TTAAC -0.39962214771615 CCCAT -0.399635507611701 AGCAA -0.400239925996667 GCCCC -0.403066413011974 CCGGG -0.403066413011974 GTGCT -0.404975371333353 AGCGG -0.406265294679294 TGCTG -0.408274542855133 CCTGA -0.409423180936957 GTACT -0.409470059583689 GGCAA -0.4096475126635 ACCGG -0.410765126907465 ATAGC -0.411573133112225 TATCA -0.41204632984588 GGTCC -0.412485259401397 CCCGT -0.413141681213471 GACTA -0.415386684647842 GTACC -0.416082015229648 CATGT -0.416477429628999 ACGCT -0.418654398481753 TCACA -0.418654398481753 ATATG -0.419723037036013 GCGCT -0.422192707395061 GTCAC -0.425234931042867 ATGGC -0.426369005909339 AGGAC -0.426402210165375 ACCTA -0.42822443838037 CATCA -0.430479080705637 AACTC -0.431723069051281 GGCTG -0.435871975423669 GCTAC -0.436847146202967 GCGCA -0.437412990977841 AATAG -0.437477556980264 CAGGG -0.437504710043379 TAAAC -0.441576453954318 TTACC -0.442421808505304 TGGAC -0.444533030307359 CACAG -0.44576638687684 TACTA -0.446669624915745 GGTGC -0.447167807394099 CTATA -0.447548707432658 TAGAC -0.447856585260801 GACCT -0.448115819595702 CAGTT -0.449350630017223 CCCAC -0.450484950051358 CTTAC -0.453670888813485 CACTT -0.453984820826981 ACTGG -0.454035450701441 TGTCA -0.454895103359827 ACAAG -0.454898808943884 TATAC -0.455709274759372 ATATT -0.455931632317112 CATAA -0.456132673727494 AGACT -0.456471293033117 TGCTA -0.456979074780271 CACAC -0.457896840388287 GCAAG -0.459481723778511 ATGCT -0.46080580940706 CACTA -0.461372208731373 CTTGC -0.461883112282788 GCGGG -0.463392057558675 GACTG -0.463582157587696 GTAAC -0.464623389776124 AATGC -0.466086700157116 CAAGA -0.471205701493723 TACTG -0.472188506070242 AATAT -0.474439360583214 AGTCA -0.476604396529199 AAGGC -0.478531978303434 CTGTC -0.479233166672468 CTACA -0.480892345340427 CTCAT -0.482995208090756 CCTCA -0.484168736230736 CACAA -0.486284504602373 CCATG -0.486384314993607 ACTCA -0.487569440638288 TATGC -0.48834175909544 CCTTA -0.48965078322558 GCCTC -0.490040769976521 GCCAA -0.490871046264722 CCTAT -0.492007966250755 ACTAC -0.494371829040379 ACTGA -0.495701384718114 TAATA -0.495726178617851 AACCT -0.497198608987726 GACCC -0.498378625010384 TGCAT -0.499225294915397 CATTA -0.499871715763389 GCCTT -0.503741106348182 TACAT -0.505207426264732 GGGGC -0.505415272454381 AAATA -0.505464156918917 CACGG -0.507120847279383 GTGCA -0.50897309186757 CAGGA -0.513966218628749 ACCGT -0.514681286255529 GGCAG -0.517210534794027 AGCAG -0.51746455276121 GACCA -0.517793472322878 CCACA -0.518671369156648 ACTAA -0.521812536726635 GCATT -0.522388570981521 GGACA -0.522397478760903 CAATA -0.523774882594819 ATGCA -0.523793540323793 CAAGC -0.524127976029293 ACATT -0.525187279607929 GGCCA -0.528041664416565 GCCAT -0.528995591249163 CCTTG -0.529472855005267 AGCCA -0.538543837440373 ATACA -0.540471971425766 TGACC -0.541394569559948 AAACA -0.543128026721186 CCATC -0.543740735901105 ACCTC -0.544525077556013 CACCA -0.544723395589238 ATGAC -0.546831222784232 GCATC -0.547472194365466 CCACT -0.549968357714696 AGTCC -0.550284074772865 AACCA -0.554243263634952 CAAAC -0.554856241275789 CCGCA -0.556157922231688 ATATC -0.557854039478724 ACTTA -0.564176277306848 TATCC -0.564694275479488 GTACA -0.566769797403983 TACAG -0.56686611941594 AAGAC -0.567395495121552 TACCA -0.569105496636929 AGACC -0.569917624108707 TAACC -0.572112188335296 TGCGC -0.575463900133807 TAGCA -0.576032051862681 AGGCA -0.578484574320773 CAGTC -0.578597470090523 TGTCC -0.579784981160346 ACTAT -0.579819107566953 CAACA -0.58122922608945 GTCAT -0.582036554057293 GCCAC -0.582036554057294 AGTAC -0.582713716726796 TAACT -0.583839923093219 ACACT -0.584944481554052 AACAG -0.587282723623253 GCGGT -0.588162656854135 ACCTT -0.588803164269448 CGGGC -0.59309174256629 CCATA -0.593289351106787 AACTA -0.593303175505819 CATCC -0.595413151493888 ACACA -0.600881369127938 CAGGT -0.602295393041094 CATTC -0.604553990618721 AAGCC -0.617779739141801 ACAGT -0.617783063749074 TGCCT -0.618598497007595 CCCCC -0.618795104067412 TGGCC -0.62267853855464 CTGCA -0.627041256045611 GGCAT -0.6285255719325 AAGCA -0.630708287215777 CAGTA -0.634901068033669 ATAAC -0.636364990067841 GCAGT -0.638953306825394 AGACA -0.640174990108816 CATAT -0.640275014556355 CAACT -0.642624162729505 CACAT -0.645239341355423 TACAC -0.645251849060956 ATACT -0.645335646892835 GCAGC -0.647893636196478 ACAAC -0.649382862383693 GCAAC -0.649532701568123 CAGAC -0.651828368980578 CTACC -0.653674428145951 CCTGT -0.657033663064235 ACATG -0.657743805474551 GTCCA -0.658139533143085 GGCCT -0.658370245697949 TGCAG -0.658775997341004 ACATA -0.659325440966376 TACCT -0.668979241072995 AATAC -0.672613500214558 GACAT -0.676713253595736 GCCTA -0.678324869116283 TGACA -0.680116675064136 GCCCT -0.682640494995699 TAACA -0.683420348427699 GTGCC -0.683627435203866 ATGCC -0.687302020754713 CCTGG -0.688033201445446 CCACC -0.688835312996876 AACAC -0.689015465776288 TAGCC -0.69004374367566 AGTGC -0.692666057491793 ACTCC -0.69481871015715 GCATA -0.698201553441526 CCAGG -0.699340463461568 AACTG -0.700096845226654 ACCAT -0.700192977530895 CCAAC -0.702898875805731 AACAT -0.704142217287306 AACCG -0.715303084920758 GACAC -0.727992907673275 AGGCC -0.734039647502344 CAACC -0.735606942649304 ACCAG -0.736990177207164 GTCCC -0.740590311350159 CCAGT -0.742870449846628 CCCTG -0.75653428528943 CCTAC -0.756614732220663 CATGC -0.759195835923889 AGCAT -0.77075592878589 ATACC -0.77399934537506 AGCCT -0.784399601515345 ACATC -0.794087031172283 CATAC -0.803481081114643 ACCCC -0.813852229280367 GCACA -0.814455592653003 CCAGC -0.817140027143296 CTGAC -0.82351239453199 AGCCC -0.856211517496288 TGCAC -0.858048813889297 CACCT -0.858684771254162 CTAAC -0.863419649089267 GGGCC -0.868340739213935 ACCAC -0.86900562012702 TGCCA -0.871331616879748 GCACT -0.878623519602114 TGGCA -0.881108035287813 CCTGC -0.890322040985988 GCAGG -0.89300543695479 GACAG -0.899328238823937 ACCTG -0.908181003745726 AGCAC -0.911616456560516 CTGGC -0.913242462532667 CAGCA -0.921988330779983 GGCCC -0.93104418300094 GCCTG -0.961228854389415 CACCC -0.989775971344123 ACAGC -0.997922447974458 ACTGC -0.998004524210628 CAGGC -1.00157544557108 ACCGC -1.01697361077384 ACCCG -1.01886274210142 ACACC -1.05540736365246 GGCAC -1.08767660211704 CAGCC -1.09113228712117 ACAGG -1.16699905477845 CTGCC -1.22214060897091 GCCAG -1.24404009054228 GCACC -1.25559297804744