def main(): # # defaults # file_name = None alphabet_file_name = None seed = 1 copies = 1 # # get command line arguments # usage = """USAGE: %s [options] -f <filename> file name (required) -t <tag> added to shuffled sequence names -s <seed> random seed; default: %d -c <n> make <n> shuffled copies of each sequence; default: %d -a <filename> alphabet file to use non-DNA alphabets -h print this usage message Note that fasta-shuffle-letters also supports dinucleotide shuffling and is faster. """ % (sys.argv[0], seed, copies) # no arguments: print usage if len(sys.argv) == 1: print >> sys.stderr, usage; sys.exit(1) tag = "" # parse command line i = 1 while i < len(sys.argv): arg = sys.argv[i] if (arg == "-f"): i += 1 try: file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-t"): i += 1 try: tag = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-s"): i += 1 try: seed = string.atoi(sys.argv[i]) except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-c"): i += 1 try: copies = string.atoi(sys.argv[i]) except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-a"): i += 1 try: alphabet_file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-h"): print >> sys.stderr, usage; sys.exit(1) else: print >> sys.stderr, "Unknown command line argument: " + arg sys.exit(1) i += 1 # check that required arguments given if (file_name == None): print >> sys.stderr, usage; sys.exit(1) # get the alphabet, defaulting to DNA if it is not provided if alphabet_file_name != None: alph = alphabet.loadFromFile(alphabet_file_name) else: alph = alphabet.dna() random.seed(seed) # read sequences seqs = sequence.readFASTA(file_name, alph) for s in seqs: seq = s.getString() name = s.getName() for i in range(copies): shuffledSeq = dinuclShuffle(seq, alph) if (copies == 1): print >> sys.stdout, ">%s\n%s" % (name+tag, shuffledSeq) else: print >> sys.stdout, ">%s_%d\n%s" % (name+tag, i, shuffledSeq)
def main(): pos_seq_file_name = None # no positive sequence file specified neg_seq_file_name = None # no negative sequence file specified alphabet_file_name = None refine = False given_only = False # # get command line arguments # usage = """USAGE: %s [options] -w <word> word (required) -p <file_name> positive sequences FASTA file name (required) -n <file_name> negative sequences FASTA file name (required) -a <file_name> alphabet definition file -r refine consensus by branching search (distance 1 steps; beam size = 1). -h print this usage message Compute the Hamming distance from <word> to each FASTA sequence in the positive and negative files. Apply Fisher's Exact test to each distance. <word> may contain ambiguous characters. """ % (sys.argv[0]) # no arguments: print usage if len(sys.argv) == 1: print >> sys.stderr, usage; sys.exit(1) # parse command line i = 1 while i < len(sys.argv): arg = sys.argv[i] if (arg == "-w"): i += 1 try: word = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-p"): i += 1 try: pos_seq_file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-n"): i += 1 try: neg_seq_file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-a"): i += 1 try: alphabet_file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-r"): try: refine = True except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-h"): print >> sys.stderr, usage; sys.exit(1) else: print >> sys.stderr, usage; sys.exit(1) i += 1 # check that required arguments given if (pos_seq_file_name == None or neg_seq_file_name == None): print >> sys.stderr, usage; sys.exit(1) # keep track of time start_time = time.time() # read alphabet alph = None if alphabet_file_name != None: alph = alphabet.loadFromFile(alphabet_file_name) else: alph = alphabet.dna() if (not alph.isComplementable()): given_only = True; print >> sys.stderr, "Alphabet is not complementable..." # read sequences print >> sys.stderr, "Reading sequences..." pos_seqs = get_strings_from_seqs(sequence.readFASTA(pos_seq_file_name, alph)) neg_seqs = get_strings_from_seqs(sequence.readFASTA(neg_seq_file_name, alph)) #print >> sys.stderr, "Computing Hamming enrichment..." #(dist, log_pvalue, p, P, n, N, aln) = get_best_hamming_alignment(word, pos_seqs, neg_seqs, alph, given_only) if refine: (best_word, best_log_pvalue) = refine_consensus(word, pos_seqs, neg_seqs, alph, given_only) else: best_word = word print >> sys.stderr, "Computing Hamming alignment..." (dist, log_pvalue, p, P, n, N, aln) = get_best_hamming_alignment(best_word, pos_seqs, neg_seqs, alph, given_only) pv_string = sprint_logx(log_pvalue, 1, _pv_format) nsites = len(aln) print >> sys.stderr, "[", p, P, n, N, dist, "]" print >> sys.stderr, "Best ZOOPs alignment has %d sites / %d at distance %d with p-value %s" % (nsites, P, dist, pv_string) print_meme_header(alph) print_meme_motif(best_word, nsites, pv_string, aln, alph) # print elapsed time end_time = time.time() elapsed = end_time - start_time print >> sys.stderr, "elapsed time: %.2f seconds" % elapsed print >> sys.stdout, "#elapsed time: %.2f seconds" % elapsed
def main(): # # defaults # file_name = None seed = 1 copies = 1 # # get command line arguments # usage = """USAGE: %s [options] -f <filename> file name (required) -t <tag> added to shuffled sequence names -s <seed> random seed; default: %d -c <n> make <n> shuffled copies of each sequence; default: %d -h print this usage message """ % (sys.argv[0], seed, copies) # no arguments: print usage if len(sys.argv) == 1: print >> sys.stderr, usage; sys.exit(1) tag = "" # parse command line i = 1 while i < len(sys.argv): arg = sys.argv[i] if (arg == "-f"): i += 1 try: file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-t"): i += 1 try: tag = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-s"): i += 1 try: seed = string.atoi(sys.argv[i]) except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-c"): i += 1 try: copies = string.atoi(sys.argv[i]) except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-h"): print >> sys.stderr, usage; sys.exit(1) else: print >> sys.stderr, "Unknown command line argument: " + arg sys.exit(1) i += 1 # check that required arguments given if (file_name == None): print >> sys.stderr, usage; sys.exit(1) random.seed(seed) # read sequences seqs = sequence.readFASTA(file_name,'Extended DNA') for s in seqs: str = s.getString() #FIXME altschul can't handle ambigs name = s.getName() #print >> sys.stderr, ">%s" % name for i in range(copies): shuffledSeq = dinuclShuffle(str) if (copies == 1): print >> sys.stdout, ">%s\n%s" % (name+tag, shuffledSeq) else: print >> sys.stdout, ">%s_%d\n%s" % (name+tag, i, shuffledSeq)
def main(): pos_seq_file_name = None # no positive sequence file specified neg_seq_file_name = None # no negative sequence file specified alphabet_file_name = None refine = False given_only = False # # get command line arguments # usage = """USAGE: %s [options] -w <word> word (required) -p <file_name> positive sequences FASTA file name (required) -n <file_name> negative sequences FASTA file name (required) -a <file_name> alphabet definition file -r refine consensus by branching search (distance 1 steps; beam size = 1). -h print this usage message Compute the Hamming distance from <word> to each FASTA sequence in the positive and negative files. Apply Fisher's Exact test to each distance. <word> may contain ambiguous characters. """ % (sys.argv[0]) # no arguments: print usage if len(sys.argv) == 1: print >> sys.stderr, usage sys.exit(1) # parse command line i = 1 while i < len(sys.argv): arg = sys.argv[i] if (arg == "-w"): i += 1 try: word = sys.argv[i] except: print >> sys.stderr, usage sys.exit(1) elif (arg == "-p"): i += 1 try: pos_seq_file_name = sys.argv[i] except: print >> sys.stderr, usage sys.exit(1) elif (arg == "-n"): i += 1 try: neg_seq_file_name = sys.argv[i] except: print >> sys.stderr, usage sys.exit(1) elif (arg == "-a"): i += 1 try: alphabet_file_name = sys.argv[i] except: print >> sys.stderr, usage sys.exit(1) elif (arg == "-r"): try: refine = True except: print >> sys.stderr, usage sys.exit(1) elif (arg == "-h"): print >> sys.stderr, usage sys.exit(1) else: print >> sys.stderr, usage sys.exit(1) i += 1 # check that required arguments given if (pos_seq_file_name == None or neg_seq_file_name == None): print >> sys.stderr, usage sys.exit(1) # keep track of time start_time = time.time() # read alphabet alph = None if alphabet_file_name != None: alph = alphabet.loadFromFile(alphabet_file_name) else: alph = alphabet.dna() if (not alph.isComplementable()): given_only = True print >> sys.stderr, "Alphabet is not complementable..." # read sequences print >> sys.stderr, "Reading sequences..." pos_seqs = get_strings_from_seqs( sequence.readFASTA(pos_seq_file_name, alph)) neg_seqs = get_strings_from_seqs( sequence.readFASTA(neg_seq_file_name, alph)) #print >> sys.stderr, "Computing Hamming enrichment..." #(dist, log_pvalue, p, P, n, N, aln) = get_best_hamming_alignment(word, pos_seqs, neg_seqs, alph, given_only) if refine: (best_word, best_log_pvalue) = refine_consensus(word, pos_seqs, neg_seqs, alph, given_only) else: best_word = word print >> sys.stderr, "Computing Hamming alignment..." (dist, log_pvalue, p, P, n, N, aln) = get_best_hamming_alignment(best_word, pos_seqs, neg_seqs, alph, given_only) pv_string = sprint_logx(log_pvalue, 1, _pv_format) nsites = len(aln) print >> sys.stderr, "[", p, P, n, N, dist, "]" print >> sys.stderr, "Best ZOOPs alignment has %d sites / %d at distance %d with p-value %s" % ( nsites, P, dist, pv_string) print_meme_header(alph) print_meme_motif(best_word, nsites, pv_string, aln, alph) # print elapsed time end_time = time.time() elapsed = end_time - start_time print >> sys.stderr, "elapsed time: %.2f seconds" % elapsed print >> sys.stdout, "#elapsed time: %.2f seconds" % elapsed
def main(): usage = "usage: %prog [options] <input FASTA>" description = "The program applies a modified EXTREME algorithm to find motifs in a FASTA file. It accepts a positive sequence set, a negative sequence set, a list of seed PFMs, and an index number indicating which of the seed PFMs to use" parser = ArgumentParser(description=description) parser.add_argument('fastafile', metavar='f', help='FASTA file containing the sequences') parser.add_argument('negfastafile', metavar='g', help='Negative FASTA file. This is for comparison so that you know the motif you discovered is over-represented.') parser.add_argument('jfile', metavar='j', help='File containing PWM seeds') parser.add_argument('indexvalue', metavar='i', help='Which seed from the Minimal MEME Format file to use (it is an integer ranging from 1 to the total number of PFM seeds in your file)', type=int) parser.add_argument("-p", "--pseudocounts", help="Pseudo counts added to initial PFM guess. Default:0.0", type=float, default=0.0) parser.add_argument("-maxsites", dest="maxsites", help="Maximum number of expected sites for the motif. If not specified, defaults to 5 times number of initial predicted sites.", type=int, default=0) parser.add_argument("-minsites", dest="minsites", help="Minimum number of expected sites for the motif. Default: 10", type=int, default=10) parser.add_argument("-t", "--tries", dest="tries", help="Number of tries for each motif discovered. The fudge factor is changed until the number of discovered sites is in the \"acceptable\" range", type=int, default=15) parser.add_argument("-s", "--seed", dest="seed", help="Random seed", type=int, default=1) parser.add_argument("-saveseqs", "--saveseqs", dest="saveseqs", help="If specified, save sequences to current directory", action='store_true') import time print "Started at:" print time.ctime() starttime = time.time() args = parser.parse_args() seed = args.seed minsites = args.minsites maxsites = args.maxsites random.seed(seed) jfile = open(args.jfile,'r') from numpy import fromstring from string import join lines = jfile.readlines() j = 0 for i in range(len(lines)): line = lines[i] if '>' in line:#This is a name line, so read in next lines for matrix j += 1 if j == args.indexvalue:#at the desired index parts = lines[i].split() pos_cs = parts[1] motifname = parts[0][1:] w = len(pos_cs) strlines = lines[i+1:i+1+w] pwm_string = '' for strline in strlines: strparts = strline.split() for strpart in strparts: pwm_string += strpart + ' ' #print pwm_string pwm_guess = fromstring(pwm_string,sep=' ',dtype=float) pwm_guess = pwm_guess.reshape((w,4)) break print 'Using initial motif guess',motifname print 'Adding',str(args.pseudocounts),'pseudocounts and normalizing' pwm_guess = pwm_guess + args.pseudocounts pwm_guess = pwm_guess/pwm_guess.sum(axis=1)[:,newaxis] jfile.close() # make the directory (recursively) import os outdir = motifname outpre = outdir + "/" clobber = True try:#adapted from DREME.py by T. Bailey os.makedirs(outdir) except OSError as exc: if exc.errno == errno.EEXIST: if not clobber: print >> sys.stderr, ("output directory (%s) already exists " "but EXTREME was not told to clobber it") % (outdir); sys.exit(1) else: raise #Use DREME's SeqIO to read in FASTA to list seqs = sequence.convert_ambigs(sequence.readFASTA(args.fastafile, None, True)) #print seqs negseqs = sequence.convert_ambigs(sequence.readFASTA(args.negfastafile, None, True)) tries = args.tries theta_motifs, theta_background_matrices, lambda_motifs, logevs, disc_pwms, disc_logevs, disc_nsites = extreme(seqs,negseqs,minsites,maxsites,pwm_guess,tries) k = 1 outputMEMEformat(disc_pwms, disc_logevs, disc_nsites, outpre) try: from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet for theta_motif, theta_background_matrix, lambda_motif, logev in zip(theta_motifs, theta_background_matrices, lambda_motifs, logevs): outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outpre) k = k+1 except ImportError: print "You do not have Weblogolib, so sequence logos will not be made" if args.saveseqs: print "Saving Positive sequences to Positive_seq.fa" pos_file = open("Positive_seq.fa","w") for s in range(len(seqs)): pos_file.write(">sequence"+str(s+1)+"\n") pos_file.write(seqs[s]+"\n") pos_file.close() print "Saving Negative sequences to Negative_seq.fa" neg_file = open("Negative_seq.fa","w") for s in range(len(negseqs)): neg_file.write(">sequence"+str(s+1)+"\n") neg_file.write(negseqs[s]+"\n") neg_file.close() print "Ended at:" print time.ctime() stoptime = time.time() duration = stoptime - starttime print "Duration:", duration
def main(): usage = "usage: %prog [options] <input FASTA> <negative FASTA>" description = "The program performs a DREME-like search for gapped k-mers" parser = ArgumentParser(description=description) parser.add_argument("fastafile", metavar="f", help="FASTA file containing the sequences") parser.add_argument("negativefile", metavar="n", help="FASTA file containing the negative sequences") parser.add_argument("outputfile", metavar="o", help="Output file") parser.add_argument( "-w", "--width", dest="width", help="Width of the motif to search for. This makes the program only search for a motif of this width. Beware if greater than 8", type=int, default=0, ) parser.add_argument( "-ming", dest="mingap", help="Minimum gap of k-mer to search for. Default: 0", type=int, default=0 ) parser.add_argument( "-maxg", dest="maxgap", help="Maximum gap of k-mer to search for. Default: 12", type=int, default=10 ) parser.add_argument( "-l", dest="halflength", help="Number of non-degenerate letters per half-site. Total number of non-degenerate letters is twice this number. Default: 4", type=int, default=4, ) parser.add_argument( "-minw", dest="minwidth", help="Minimum width of the motif to search for. The default is 3, which is the width of the smallest core motif.", type=int, default=3, ) parser.add_argument( "-maxw", dest="maxwidth", help="Maximum width of the motif to search for. This program does one refinement at this width (if greater than 8), and then picks the most significant short-mer. Default: 8", type=int, default=8, ) parser.add_argument( "-mink", dest="mink", help="Minimum width of the core to search for. The default is 3, which is the width of the smallest core motif.", type=int, default=3, ) parser.add_argument( "-maxk", dest="maxk", help="Maximum width of the core to search for. Default: 8", type=int, default=8 ) parser.add_argument( "-z", "--zthresh", dest="zthresh", help="Corrected z-score threshold. Default: 5", type=float, default=5 ) parser.add_argument( "-minsites", "--minsites", dest="minsites", help="Minimum number of sites for a k-mer to be included. Default: 10", type=int, default=10, ) args = parser.parse_args() pos_seq_file_name = args.fastafile neg_seq_file_name = args.negativefile print "Reading positive sequence file..." pos_seqs = sequence.convert_ambigs(sequence.readFASTA(pos_seq_file_name, None, True)) print "Reading negative sequence file..." neg_seqs = sequence.convert_ambigs(sequence.readFASTA(neg_seq_file_name, None, True)) halflength = args.halflength ming = args.mingap maxg = args.maxgap zthresh = args.zthresh minsites = args.minsites find_kmers(pos_seqs, neg_seqs, halflength, ming, maxg, minsites, zthresh, args.outputfile)
def main(): usage = "usage: %prog [options] <input FASTA> <negative FASTA>" description = "The program performs a DREME-like search for gapped k-mers" parser = ArgumentParser(description=description) parser.add_argument('fastafile', metavar='f', help='FASTA file containing the sequences') parser.add_argument('negativefile', metavar='n', help='FASTA file containing the negative sequences') parser.add_argument('outputfile', metavar='o', help='Output file') parser.add_argument( "-w", "--width", dest="width", help= "Width of the motif to search for. This makes the program only search for a motif of this width. Beware if greater than 8", type=int, default=0) parser.add_argument("-ming", dest="mingap", help="Minimum gap of k-mer to search for. Default: 0", type=int, default=0) parser.add_argument("-maxg", dest="maxgap", help="Maximum gap of k-mer to search for. Default: 12", type=int, default=10) parser.add_argument( "-l", dest="halflength", help= "Number of non-degenerate letters per half-site. Total number of non-degenerate letters is twice this number. Default: 4", type=int, default=4) parser.add_argument( "-minw", dest="minwidth", help= "Minimum width of the motif to search for. The default is 3, which is the width of the smallest core motif.", type=int, default=3) parser.add_argument( "-maxw", dest="maxwidth", help= "Maximum width of the motif to search for. This program does one refinement at this width (if greater than 8), and then picks the most significant short-mer. Default: 8", type=int, default=8) parser.add_argument( "-mink", dest="mink", help= "Minimum width of the core to search for. The default is 3, which is the width of the smallest core motif.", type=int, default=3) parser.add_argument( "-maxk", dest="maxk", help="Maximum width of the core to search for. Default: 8", type=int, default=8) parser.add_argument("-z", "--zthresh", dest="zthresh", help="Corrected z-score threshold. Default: 5", type=float, default=5) parser.add_argument( "-minsites", "--minsites", dest="minsites", help="Minimum number of sites for a k-mer to be included. Default: 10", type=int, default=10) args = parser.parse_args() pos_seq_file_name = args.fastafile neg_seq_file_name = args.negativefile print 'Reading positive sequence file...' pos_seqs = sequence.convert_ambigs( sequence.readFASTA(pos_seq_file_name, None, True)) print 'Reading negative sequence file...' neg_seqs = sequence.convert_ambigs( sequence.readFASTA(neg_seq_file_name, None, True)) halflength = args.halflength ming = args.mingap maxg = args.maxgap zthresh = args.zthresh minsites = args.minsites find_kmers(pos_seqs, neg_seqs, halflength, ming, maxg, minsites, zthresh, args.outputfile)
def main(): usage = "usage: %prog [options] <input FASTA>" description = "The program applies a modified EXTREME algorithm to find motifs in a FASTA file. It accepts a positive sequence set, a negative sequence set, a list of seed PFMs, and an index number indicating which of the seed PFMs to use" parser = ArgumentParser(description=description) parser.add_argument('fastafile', metavar='f', help='FASTA file containing the sequences') parser.add_argument('negfastafile', metavar='g', help='Negative FASTA file. This is for comparison so that you know the motif you discovered is over-represented.') parser.add_argument('jfile', metavar='j', help='File containing PWM seeds') parser.add_argument('indexvalue', metavar='i', help='Which seed from the Minimal MEME Format file to use (it is an integer ranging from 1 to the total number of PFM seeds in your file)', type=int) parser.add_argument("-p", "--pseudocounts", help="Pseudo counts added to initial PFM guess. Default:0.0", type=float, default=0.0) parser.add_argument("-q", "--initialstep", help="The initial step size for the online EM algorithm. A VERY sensitive parameter. I get best success for ChIP size data (about 100,000 to 1,000,000 bps) with a step size of 0.05. For DNase footprinting, which usually has >5,000,000 bps, I find 0.02 works best. Default:0.05", type=float, default=0.05) parser.add_argument("-maxsites", dest="maxsites", help="Maximum number of expected sites for the motif. If not specified, defaults to 5 times number of initial predicted sites.", type=int, default=0) parser.add_argument("-minsites", dest="minsites", help="Minimum number of expected sites for the motif. Default: 10", type=int, default=10) parser.add_argument("-t", "--tries", dest="tries", help="Number of tries for each motif discovered. The fudge factor is changed until the number of discovered sites is in the \"acceptable\" range", type=int, default=15) parser.add_argument("-s", "--seed", dest="seed", help="Random seed", type=int, default=1) parser.add_argument("-saveseqs", "--saveseqs", dest="saveseqs", help="If specified, save sequences to current directory", action='store_true') parser.add_argument("-b", "--background", dest="background", help="If specified, the minimal MEME output will use the calculated background probabilities instead of uniform probabilities.", action='store_true') import time print "Started at:" print time.ctime() starttime = time.time() args = parser.parse_args() seed = args.seed initialstep = args.initialstep minsites = args.minsites maxsites = args.maxsites random.seed(seed) jfile = open(args.jfile,'r') from numpy import fromstring from string import join lines = jfile.readlines() j = 0 for i in range(len(lines)): line = lines[i] if '>' in line:#This is a name line, so read in next lines for matrix j += 1 if j == args.indexvalue:#at the desired index parts = lines[i].split() pos_cs = parts[1] motifname = parts[0][1:] w = len(pos_cs) strlines = lines[i+1:i+1+w] pwm_string = '' for strline in strlines: strparts = strline.split() for strpart in strparts: pwm_string += strpart + ' ' #print pwm_string pwm_guess = fromstring(pwm_string,sep=' ',dtype=float) pwm_guess = pwm_guess.reshape((w,4)) break print 'Using initial motif guess',motifname print 'Adding',str(args.pseudocounts),'pseudocounts and normalizing' pwm_guess = pwm_guess + args.pseudocounts pwm_guess = pwm_guess/pwm_guess.sum(axis=1)[:,newaxis] jfile.close() # make the directory (recursively) import os outdir = motifname outpre = outdir + "/" clobber = True try:#adapted from DREME.py by T. Bailey os.makedirs(outdir) except OSError as exc: if exc.errno == errno.EEXIST: if not clobber: print >> sys.stderr, ("output directory (%s) already exists " "but EXTREME was not told to clobber it") % (outdir); sys.exit(1) else: raise #Use DREME's SeqIO to read in FASTA to list seqs = sequence.convert_ambigs(sequence.readFASTA(args.fastafile, None, True)) #print seqs negseqs = sequence.convert_ambigs(sequence.readFASTA(args.negfastafile, None, True)) tries = args.tries theta_motifs, theta_background_matrices, lambda_motifs, logevs, disc_pwms, disc_bkg, disc_logevs, disc_nsites = extreme(seqs,negseqs,minsites,maxsites,pwm_guess,initialstep,tries) k = 1 outputMEMEformat(disc_pwms, disc_bkg, disc_logevs, disc_nsites, outpre, args.background) try: from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet for theta_motif, theta_background_matrix, lambda_motif, logev in zip(theta_motifs, theta_background_matrices, lambda_motifs, logevs): outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outpre) k = k+1 except ImportError: print "You do not have Weblogolib, so sequence logos will not be made" if args.saveseqs: print "Saving Positive sequences to Positive_seq.fa" pos_file = open("Positive_seq.fa","w") for s in range(len(seqs)): pos_file.write(">sequence"+str(s+1)+"\n") pos_file.write(seqs[s]+"\n") pos_file.close() print "Saving Negative sequences to Negative_seq.fa" neg_file = open("Negative_seq.fa","w") for s in range(len(negseqs)): neg_file.write(">sequence"+str(s+1)+"\n") neg_file.write(negseqs[s]+"\n") neg_file.close() print "Ended at:" print time.ctime() stoptime = time.time() duration = stoptime - starttime print "Duration:", duration
def main(): # # defaults # file_name = None seed = 1 copies = 1 # # get command line arguments # usage = """USAGE: %s [options] -f <filename> file name (required) -t <tag> added to shuffled sequence names -s <seed> random seed; default: %d -c <n> make <n> shuffled copies of each sequence; default: %d -h print this usage message """ % (sys.argv[0], seed, copies) # no arguments: print usage if len(sys.argv) == 1: print >> sys.stderr, usage; sys.exit(1) tag = ""; # parse command line i = 1 while i < len(sys.argv): arg = sys.argv[i] if (arg == "-f"): i += 1 try: file_name = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-t"): i += 1 try: tag = sys.argv[i] except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-s"): i += 1 try: seed = string.atoi(sys.argv[i]) except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-c"): i += 1 try: copies = string.atoi(sys.argv[i]) except: print >> sys.stderr, usage; sys.exit(1) elif (arg == "-h"): print >> sys.stderr, usage; sys.exit(1) else: print >> sys.stderr, "Unknown command line argument: " + arg sys.exit(1) i += 1 # check that required arguments given if (file_name == None): print >> sys.stderr, usage; sys.exit(1) random.seed(seed) # read sequences seqs = sequence.readFASTA(file_name,'Extended DNA') for s in seqs: str = s.getString() #FIXME altschul can't handle ambigs name = s.getName() #print >> sys.stderr, ">%s" % name for i in range(copies): shuffledSeq = dinuclShuffle(str) if (copies == 1): print >> sys.stdout, ">%s\n%s" % (name+tag, shuffledSeq) else: print >> sys.stdout, ">%s_%d\n%s" % (name+tag, i, shuffledSeq)