parser.add_argument('-l','--windows',dest='wsize',action='store',type=int, default=None, help='It sets the windows size to analize the testing set.') args = parser.parse_args() ######## 1 TT SETS ###### training_filename='training_'+args.infile testing_filename='testing_'+args.infile sys.stderr.write("Starting the Log:\nThe input data is from {}\n".format(args.infile)) sys.stderr.write("Arguments:\nMM Order:{}\nWindows Length:{}\n".format(args.order,args.wsize)) if args.pseudocounts: sys.stderr.write("This run is using pseudocounts.\n\n\n") sys.stderr.write("Starting the training and testing separation...\n\n") m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename) ######## 2 +/- ########### ######## 3 kmer dict ##### if (args.pseudocounts): sys.stderr.write("Starting the training background and foreground separation...\n\n") back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order) sys.stderr.write("The MM for the bg is generated...\n\n") sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order) sys.stderr.write("The MM for the bg is generated...\n\n") else: back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order)
### python3 MMIDv3.1.py -i ENCF... -o results/v3.1 -p for k in k_list: for l in l_list: count=1+count if k<l: sys.stderr.write("The log begins, k-order = {} and windows length = {}\n".format(k,l)) sys.stderr.write("We are in the {} of the simulations\n".format((count//54)*100)) training_filename='training_'+args.infile testing_filename='testing_'+args.infile sys.stderr.write("Starting the Log:\nThe input data is from {}\n".format(args.infile)) sys.stderr.write("Arguments:\nMM Order:{}\nWindows Length:{}\n".format(k,l)) if args.pseudocounts: sys.stderr.write("This run is using pseudocounts.\n\n\n") sys.stderr.write("Starting the training and testing separation...\n\n") m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename) ######## 2 +/- ########### ######## 3 kmer dict ##### if (args.pseudocounts): sys.stderr.write("Starting the training background and foreground separation...\n\n") back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],k) sys.stderr.write("The MM for the bg is generated...\n\n") sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],k) sys.stderr.write("The MM for the bg is generated...\n\n") else: back_dict=m.build_hash([x for x in m.background_separation(training_filename)],k)
parser.add_argument('-w','--web_logo',dest='weblogo',action='store_true', default=False, help='It generate the top represented k-mers in the MM.') args = parser.parse_args() if args.train: ######## 1 TT SETS ###### training_filename='training_'+args.infile testing_filename='testing_'+args.infile sys.stderr.write("\t\tMMID v.4\n\t\tAdvanced Genome Bioinformatics\n\t\tAndreu Bofill & David Mas\n\nThe input CLIP data is from {}\n".format(args.infile)) sys.stderr.write("Arguments:\nMM Order:{}\nWindows Length:{}\n".format(args.order,args.wsize)) if args.pseudocounts: sys.stderr.write("This run is using pseudocounts.\n\n") sys.stderr.write("First Step: Training and Testing file separation:\n\n") m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename) sys.stderr.write("DONE!\n\n") ##### 2 and 3 : Generating the fg and bg dictionaries #### if (args.pseudocounts): sys.stderr.write("Second Step: Generating the Markov Models\n") back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order) sys.stderr.write("The MM for the background DONE!\n") sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order) sys.stderr.write("The MM for the foreground DONE!\n\n") else: sys.stderr.write("Second Step: Generating the Markov Models\n") back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order) sys.stderr.write("The MM for the background DONE!\n") sign_dict=m.build_hash([x for x in m.signal_separation(training_filename)],args.order) sys.stderr.write("The MM for the foreground DONE!\n\n") #### 4. printing the MM in a file ####
default=None, help='The output file where the MM is going to be written.') parser.add_argument('-p','--pseudocounts', dest='pseudocounts', action='store_true', default=False, help="Add pseudocounts in the generated MM" ) parser.add_argument('-k','--order',dest='order',action='store',type=int, default=3, help='It sets the order of the Markov Model used.') args = parser.parse_args() ######## 1 TT SETS ###### training_filename='training_'+args.infile testing_filename='testing_'+args.infile m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename) ######## 2 +/- ########### (background, signal)=m.background_signal_separation(training_filename) ######## 3 kmer dict ##### if (args.pseudocounts): back_dict=m.build_hash_pseudocount(background,args.order) sign_dit=m.build_hash_pseudocount(signal,args.order) else: back_dict=m.build_hash(background,args.order) sign_dit=m.build_hash(signal,args.order) ######### 4 print ######## m.print_hash(sign_dit,back_dict,args.outfile)