if (args.pseudocounts): sys.stderr.write("Second Step: Generating the Markov Models\n") back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order) sys.stderr.write("The MM for the background DONE!\n") sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order) sys.stderr.write("The MM for the foreground DONE!\n\n") else: sys.stderr.write("Second Step: Generating the Markov Models\n") back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order) sys.stderr.write("The MM for the background DONE!\n") sign_dict=m.build_hash([x for x in m.signal_separation(training_filename)],args.order) sys.stderr.write("The MM for the foreground DONE!\n\n") #### 4. printing the MM in a file #### output_mm=args.outfile+'.MM' sys.stderr.write("Printing the model in {}\n".format(output_mm)) m.print_hash(sign_dict,back_dict,output_mm) #### 5. Evaluation of the model #### sys.stderr.write("Third Step, Evaluation of the model\n") if (args.boundaries): sys.stderr.write("Boundaries evaluation have been selected.\nThe threshold range goes from -40 to 40\n") output_filename= args.outfile+'.data' output=open(output_filename, "w") list_t=[-40]+[x for x in range(-10,10,1)]+[40] for i in list_t: sys.stderr.write("{}\n".format(i)) TPR=0 FPR=0 PPV=0 TP=0 FP=0 TN=0
if (args.pseudocounts): sys.stderr.write("Starting the training background and foreground separation...\n\n") back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order) sys.stderr.write("The MM for the bg is generated...\n\n") sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order) sys.stderr.write("The MM for the bg is generated...\n\n") else: back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order) sign_dict=m.build_hash([x for x in m.signal_separation(training_filename)],args.order) ######### 4 print ######## output_filename=args.outfile+'.MM' sys.stderr.write("Printing the model in {}...\n\n".format(output_filename)) m.print_hash(sign_dict,back_dict,output_filename) ######## 5 windows ####### sys.stderr.write("Starting the testing background and foreground separation...\n\n") sys.stderr.write("Starting the computation of the Scores...\n\n") output_filename_fg=args.outfile+'_fg.th' output_filename_bg=args.outfile+'_bg.th' output_filename_all_top = args.outfile+'_all_top.th' output_filename_all_bot = args.outfile+'_all_bot.th' out_fg=open(output_filename_fg, "w") out_bg=open(output_filename_bg, "w") output_all_top=open(output_filename_all_top, "w") output_all_bot=open(output_filename_all_bot, "w") sys.stderr.write("Printing the background scores at {}...\n\n".format(output_filename_bg))
action='store_true', default=False, help="Add pseudocounts in the generated MM" ) parser.add_argument('-k','--order',dest='order',action='store',type=int, default=3, help='It sets the order of the Markov Model used.') args = parser.parse_args() ######## 1 TT SETS ###### training_filename='training_'+args.infile testing_filename='testing_'+args.infile m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename) ######## 2 +/- ########### (background, signal)=m.background_signal_separation(training_filename) ######## 3 kmer dict ##### if (args.pseudocounts): back_dict=m.build_hash_pseudocount(background,args.order) sign_dit=m.build_hash_pseudocount(signal,args.order) else: back_dict=m.build_hash(background,args.order) sign_dit=m.build_hash(signal,args.order) ######### 4 print ######## m.print_hash(sign_dit,back_dict,args.outfile) sys.exit()