Пример #1
0
parser.add_argument('-l','--windows',dest='wsize',action='store',type=int,
                    default=None,
                    help='It sets the windows size to analize the testing set.')


args = parser.parse_args()

######## 1 TT SETS ######
training_filename='training_'+args.infile
testing_filename='testing_'+args.infile
sys.stderr.write("Starting the Log:\nThe input data is from {}\n".format(args.infile))
sys.stderr.write("Arguments:\nMM Order:{}\nWindows Length:{}\n".format(args.order,args.wsize))
if args.pseudocounts:
    sys.stderr.write("This run is using pseudocounts.\n\n\n")
sys.stderr.write("Starting the training and testing separation...\n\n")
m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename)

######## 2 +/- ###########


######## 3 kmer dict #####


if (args.pseudocounts):
    sys.stderr.write("Starting the training background and foreground separation...\n\n")
    back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order)
    sys.stderr.write("The MM for the bg is generated...\n\n")
    sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order)
    sys.stderr.write("The MM for the bg is generated...\n\n")
else:
    back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order)
Пример #2
0
### python3 MMIDv3.1.py -i ENCF... -o results/v3.1 -p
for k in k_list:
    for l in l_list:
        count=1+count
        if k<l:
            sys.stderr.write("The log begins, k-order = {} and windows length = {}\n".format(k,l))
            sys.stderr.write("We are in the {} of the simulations\n".format((count//54)*100))
            training_filename='training_'+args.infile
            testing_filename='testing_'+args.infile
            sys.stderr.write("Starting the Log:\nThe input data is from {}\n".format(args.infile))
            sys.stderr.write("Arguments:\nMM Order:{}\nWindows Length:{}\n".format(k,l))
            if args.pseudocounts:
                sys.stderr.write("This run is using pseudocounts.\n\n\n")
            sys.stderr.write("Starting the training and testing separation...\n\n")
            m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename)

            ######## 2 +/- ###########


            ######## 3 kmer dict #####


            if (args.pseudocounts):
                sys.stderr.write("Starting the training background and foreground separation...\n\n")
                back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],k)
                sys.stderr.write("The MM for the bg is generated...\n\n")
                sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],k)
                sys.stderr.write("The MM for the bg is generated...\n\n")
            else:
                back_dict=m.build_hash([x for x in m.background_separation(training_filename)],k)
Пример #3
0
parser.add_argument('-w','--web_logo',dest='weblogo',action='store_true',
                    default=False,
                    help='It generate the top represented k-mers in the MM.')

args = parser.parse_args()

if args.train:
    ######## 1 TT SETS ######
    training_filename='training_'+args.infile
    testing_filename='testing_'+args.infile
    sys.stderr.write("\t\tMMID v.4\n\t\tAdvanced Genome Bioinformatics\n\t\tAndreu Bofill & David Mas\n\nThe input CLIP data is from {}\n".format(args.infile))
    sys.stderr.write("Arguments:\nMM Order:{}\nWindows Length:{}\n".format(args.order,args.wsize))
    if args.pseudocounts:
        sys.stderr.write("This run is using pseudocounts.\n\n")
    sys.stderr.write("First Step: Training and Testing file separation:\n\n")
    m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename)
    sys.stderr.write("DONE!\n\n")
    ##### 2 and 3 : Generating the fg and bg dictionaries ####
    if (args.pseudocounts):
        sys.stderr.write("Second Step: Generating the Markov Models\n")
        back_dict=m.build_hash_pseudocount([x for x in m.background_separation(training_filename)],args.order)
        sys.stderr.write("The MM for the background DONE!\n")
        sign_dict=m.build_hash_pseudocount([x for x in m.signal_separation(training_filename)],args.order)
        sys.stderr.write("The MM for the foreground DONE!\n\n")
    else:
        sys.stderr.write("Second Step: Generating the Markov Models\n")
        back_dict=m.build_hash([x for x in m.background_separation(training_filename)],args.order)
        sys.stderr.write("The MM for the background DONE!\n")
        sign_dict=m.build_hash([x for x in m.signal_separation(training_filename)],args.order)
        sys.stderr.write("The MM for the foreground DONE!\n\n")
    #### 4. printing the MM in a file ####
Пример #4
0
                    default=None,
                    help='The output file where the MM is going to be written.')
parser.add_argument('-p','--pseudocounts', dest='pseudocounts',
                    action='store_true',
                    default=False,
                    help="Add pseudocounts in the generated MM"
                    )
parser.add_argument('-k','--order',dest='order',action='store',type=int,
                    default=3,
                    help='It sets the order of the Markov Model used.')
args = parser.parse_args()

######## 1 TT SETS ######
training_filename='training_'+args.infile
testing_filename='testing_'+args.infile
m.training_testing_sets_separation(args.infile,3,training_filename,testing_filename)

######## 2 +/- ###########
(background, signal)=m.background_signal_separation(training_filename)

######## 3 kmer dict #####
if (args.pseudocounts):
    back_dict=m.build_hash_pseudocount(background,args.order)
    sign_dit=m.build_hash_pseudocount(signal,args.order)
else:
    back_dict=m.build_hash(background,args.order)
    sign_dit=m.build_hash(signal,args.order)

######### 4 print ########

m.print_hash(sign_dit,back_dict,args.outfile)