from FASTA import * import numpy import pylab as P P.ion() nucleotides = ['G', 'A', 'T', 'C'] nucleotide_to_index = {} for i, nuc in enumerate(nucleotides): nucleotide_to_index[nuc] = i # build PSSM on yeast genome: yeast = FASTA('s_cerevisiae.fasta') # motif is TATAwxyzuv motif_start = 'TATA' motif_length = 10 pseudo_count = 1 count_pssm = numpy.zeros((motif_length, 4)) + 1 num_matches = 0 for chromosome_name, chromosome_sequence in yeast.accession_to_sequence.items( ): print 'processing', chromosome_name for i in xrange(len(chromosome_sequence) - motif_length): sl = chromosome_sequence[i:i + motif_length] if sl.startswith(motif_start): num_matches += 1 for i, nuc in enumerate(sl): nuc_index = nucleotide_to_index[nuc]
# (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import FASTA import sys recs = FASTA.readFasta(sys.stdin) maxLen = max([len(rec.sequence) for rec in recs]) titleWidth = 10 seqWidth = 60 start = 0 while start < maxLen: for rec in recs: if start == 0: print "%s%s" % (rec.title.ljust(titleWidth)[0:titleWidth], rec.sequence[start : start + seqWidth]) else: print "%s%s" % (' ' * titleWidth, rec.sequence[start : start + seqWidth])
anchors = {} for line in strm: fields = line.split() anchors[fields[0]] = fields[3:5] return anchors # Test for existence of genscan parameter file if not os.path.exists(options.genscanParamFile): sys.stderr.write("Error: Genscan parameter file %s does not exist\n" % options.genscanParamFile) sys.exit(1) # Read in the sequences from the multi-fasta file sys.stderr.write("Reading in multi-fasta file...") multiFastaFile = file(multiFastaFilename) fastaRecs = FASTA.readFasta(multiFastaFile) multiFastaFile.close() sys.stderr.write("done\n") # Make protein anchors for each sequence for rec in fastaRecs: rec.title = firstWord(rec.title) chromFile = file(os.path.join(workdir, rec.title + ".chroms"), 'w') chromFile.write("%s\t%d\n" % (rec.title, len(rec.sequence))) chromFile.close() sys.stderr.write("Writing single-fasta file...") fastaFilename = os.path.join(workdir, rec.title + ".fa") fastaFile = file(fastaFilename, 'w') fastaFile.write(str(rec)) fastaFile.close()