def main(): from TAMO import MotifMetrics if len(sys.argv) < 2: print "Usage: %s <fasta_file>"%(re.sub('^.*/','',sys.argv[0])) print ' [-w width (10)] Model Width (note AlignACE allows gaps)' print ' [-iter (10)] Number of times to run AlignACE ' print ' [-genome fsafile] Genome (for computing background)' print ' [-gcback (.38) GC background (use 0.44 for human, 0.38 for yeast)]' sys.exit(1) print "#" + ' '.join([x.replace(' ','\ ') for x in sys.argv]) fastafile = sys.argv[1] width = 10 valid_tfs = [] iter = 10 genome = 'YEAST' gcback = 0.38 for tok,i in zip(sys.argv,range(len(sys.argv))): if tok == '-w' : width = int(sys.argv[i+1]) elif tok == '-valid' : valid_tfs.append(sys.argv[i+1]) elif tok == '-iter' : iter = int(sys.argv[i+1]) elif tok == '-gcback': gcback = float(sys.argv[i+1]) elif tok == '-genome' : genome = sys.argv[i+1] elif tok == '-H250' : genome = 'HUMAN_250' gcback = 0.44 elif tok == '-Ch22' : genome = 'Ch22' gcback = 0.44 theMeta = MetaAce(fastafile,width,iter,gcback) Genome = MotifMetrics.ProbeSet(genome) ids = Genome.ids_from_file(fastafile) ids = Genome.filter(ids) #Only uses IDs that are actually in the Genome file motifs = [] motifs.extend(theMeta.results) for motif in motifs: motif.pvalue = Genome.p_value(motif,ids,factor=0.7) motif.church = Genome.church(motif,ids) for valid_tf in valid_tfs: motif.valid = Validate.validate(motif,valid_tf,'','Want Tuple') motifs.sort(lambda x,y: cmp(x.church,y.church)) print_motifs(motifs,kmer_count=-1)
def main(): short_opts = 'f:' long_opts = ['genome=', 'range=', 'top=', 'pcnt=', 'bgfile='] try: opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts) except getopt.GetoptError: print getopt.GetoptError.__dict__ usage() if not opts: usage() fastafile = '' top_count = 10 top_pcnt = None genome = 'YEAST' w_start = 8 w_stop = 15 bgfile = MDSCAN_DIR + 'yeast_int.bg' for opt, value in opts: if opt == '-f': fastafile = value if opt == '--genome': genome = value if opt == '--top': top_count = int(value) if opt == '--pcnt': top_pcnt = float(value) if opt == '--range': w_start, w_stop = [int(x) for x in value.split(',')] print "#" + ' '.join(sys.argv) probeids = Fasta.keys(fastafile) Genome = MotifMetrics.ProbeSet(genome) probeids = Genome.filter(probeids) if top_pcnt: top_count = max(top_count, int(top_pcnt / 100.0 * len(probeids))) theMeta = metaMDscan(fastafile, w_start, w_stop, top_count) for m in theMeta.motifs: m.pvalue = Genome.p_value(m, probeids, 'v') m.church = Genome.church(m, probeids, 'v') sys.stdout.flush() theMeta.motifs.sort(lambda x, y: cmp(x.pvalue, y.pvalue)) print_motifs(theMeta.motifs)
def main(): short_opts = 'f:' long_opts = ['genome=', 'range=', 'top=', 'pcnt=', 'bgfile='] try: opts, args = getopt.getopt(sys.argv[1:], short_opts, long_opts) except getopt.GetoptError: print getopt.GetoptError.__dict__ usage() if not opts: usage() fastafile = '' top_count = 10 top_pcnt = None genome = 'YEAST' w_start = 8 w_stop = 15 bgfile = MDSCAN_DIR + 'yeast_int.bg' for opt,value in opts: if opt == '-f': fastafile = value if opt == '--genome': genome = value if opt == '--top': top_count = int(value) if opt == '--pcnt': top_pcnt = float(value) if opt == '--range': w_start,w_stop= [int(x) for x in value.split(',')] print "#" + ' '.join(sys.argv) probeids = Fasta.keys(fastafile) Genome = MotifMetrics.ProbeSet(genome) probeids = Genome.filter(probeids) if top_pcnt: top_count = max(top_count,int(top_pcnt/100.0 * len(probeids))) theMeta = metaMDscan(fastafile,w_start,w_stop,top_count) for m in theMeta.motifs: m.pvalue = Genome.p_value(m,probeids,'v') m.church = Genome.church(m,probeids,'v') sys.stdout.flush() theMeta.motifs.sort(lambda x,y: cmp(x.pvalue,y.pvalue)) print_motifs(theMeta.motifs)