Пример #1
0
def generate_pickles(save_path, limit_to):

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    print "Loading from DB"
    print "pentaplets"
    pentaplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)
    print "quadruplets"
    quadruplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
    print "triplets"
    triplets = tr.get_report_kplets(profile_id2code, limit_to=limit_to)
    print "duplets"
    duplets = d.get_report_kplets(profile_id2code, limit_to=limit_to)

    print "Dumping to files"

    dump_file = os.path.join(save_path, 'duplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, duplets)

    dump_file = os.path.join(save_path, 'triplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, triplets)

    dump_file = os.path.join(save_path, 'quadruplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, quadruplets)

    dump_file = os.path.join(save_path, 'pentaplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, pentaplets)
Пример #2
0
def generate_pickles(save_path, limit_to):

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    print "Loading from DB"
    print "pentaplets"
    pentaplets  =  p.get_report_kplets(profile_id2code, limit_to=limit_to)
    print "quadruplets"
    quadruplets =  q.get_report_kplets(profile_id2code, limit_to=limit_to)
    print "triplets"
    triplets    = tr.get_report_kplets(profile_id2code, limit_to=limit_to)
    print "duplets"
    duplets     =  d.get_report_kplets(profile_id2code, limit_to=limit_to)

    print "Dumping to files"

    dump_file = os.path.join(save_path, 'duplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, duplets)

    dump_file = os.path.join(save_path, 'triplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, triplets)

    dump_file = os.path.join(save_path, 'quadruplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, quadruplets)

    dump_file = os.path.join(save_path, 'pentaplets.p.bz2')
    print dump_file
    t.dump_compressed_pickle(dump_file, pentaplets)
Пример #3
0
def merging_pipeline_for_order(order, load_from_db=False):
    limit_to = 1000000000
    print "starting for ", order
    if load_from_db:
        print "Loading kplets from DB"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
            kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'
            kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)
    else:
        print "Loading kplets from pickle file"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'
        kplet_file_full = os.path.join(data_path, kplet_file)
        print kplet_file_full
        kplets = t.load_compressed_pickle(kplet_file_full)

    # print "Starting for", kplet_file
    # print "Loading kplets"
    # kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
    print "No of kplets:", len(kplets)

    #print "Loading file2genes"

    #_file2genes = {}
    #for _f in os.listdir(neighborhood_files_path):
    #    _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f))

    # print 'Filtering'
    # kplets = filter_seed(kplets, _file2genes)
    # print "No of kplets:", len(kplets)
    # fname = os.path.join(data_path,  kplet_file.split('.')[0]+'_seed.p.bz2')
    # print 'Dumping', fname
    # t.dump_compressed_pickle(fname, kplets)

    print "Basic merging"
    merged_lists = merging.basic_merge_within_orders(kplets)
    fname = os.path.join(data_path, "basic_merged_" + kplet_file)
    print "Dumping basic merging: ", fname
    t.dump_compressed_pickle(fname, merged_lists)

    print "Iterative merging"
    merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists)
    fname = os.path.join(data_path, "iterative_merged_" + kplet_file)
    print "Dumping Iterative merging: ", fname
    t.dump_compressed_pickle(fname, merged_lists)
Пример #4
0
def generate_pickle_order(order, save_path, limit_to):

    print "Loading from DB"
    if order == 2:
        print 'duplets'
        data_file = 'duplets.p.bz2'
        kplets = d.get_report_kplets(profile_id2code, limit_to=limit_to)
    elif order == 3:
        print 'triplets'
        data_file = 'triplets.p.bz2'
        kplets = tr.get_report_kplets(profile_id2code, limit_to=limit_to)
    elif order == 4:
        print 'quadruplets'
        data_file = 'quadruplets.p.bz2'
        kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
    elif order == 5:
        print 'pentaplets'
        data_file = 'pentaplets.p.bz2'
        kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)

        # # block for work aorund of too bign pentaplet
        # print 'Loading file2genes'
        # neighborhood_files_path = os.path.join(gv.project_data_path,'CRISPR/datasets/crispr/wgs')
        # _file2genes = {}
        # for _f in os.listdir(neighborhood_files_path):
        #     _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f))
        #
        # kplets = filter_seed(kplets, _file2genes)

        # dump_file = os.path.join(save_path, data_file)
        # print "Dumtiping to file", dump_file
        # t.dump_compressed_pickle(kplets, dump_file)
        # print "Finished"
        # sys.exit()

    dump_file = os.path.join(save_path, data_file)
    print "Dumping to file", dump_file
    t.dump_compressed_pickle(dump_file, kplets)
    print "Finished"
Пример #5
0
def generate_pickle_order(order, save_path, limit_to):

    print "Loading from DB"
    if order == 2:
        print 'duplets'
        data_file = 'duplets.p.bz2'
        kplets = d.get_report_kplets(profile_id2code, limit_to=limit_to)
    elif order == 3:
        print 'triplets'
        data_file = 'triplets.p.bz2'
        kplets = tr.get_report_kplets(profile_id2code, limit_to=limit_to)
    elif order == 4:
        print 'quadruplets'
        data_file = 'quadruplets.p.bz2'
        kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
    elif order == 5:
        print 'pentaplets'
        data_file = 'pentaplets.p.bz2'
        kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)

        # # block for work aorund of too bign pentaplet
        # print 'Loading file2genes'
        # neighborhood_files_path = os.path.join(gv.project_data_path,'CRISPR/datasets/crispr/wgs')
        # _file2genes = {}
        # for _f in os.listdir(neighborhood_files_path):
        #     _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f))
        #
        # kplets = filter_seed(kplets, _file2genes)

        # dump_file = os.path.join(save_path, data_file)
        # print "Dumtiping to file", dump_file
        # t.dump_compressed_pickle(kplets, dump_file)
        # print "Finished"
        # sys.exit()

    dump_file = os.path.join(save_path, data_file)
    print "Dumping to file", dump_file
    t.dump_compressed_pickle(dump_file, kplets)
    print "Finished"
Пример #6
0
def merging_pipeline_for_order(order, data_path, load_from_db=False):
    limit_to = 1000000000
    print "starting for ", order
    if load_from_db:
        print "Loading kplets from DB"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
            kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'
            kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)
    else:
        print "Loading kplets from pickle file"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'

    kplet_file_full = os.path.join(data_path, kplet_file)
    print "Loading :",kplet_file_full
    kplets = t.load_compressed_pickle(kplet_file_full)

    print "No of kplets:", len(kplets)
    
    #print "Loading file2genes"
    # tic = time.time()
    # print "Basic merging"
    # merged_lists = merging.basic_merge_within_orders(kplets)
    # print "Basic merging done. Merged lists:", len(merged_lists)
    # # fname = os.path.join(data_path, "basic_merged_"+kplet_file)
    # # print "Dumping basic merging: ", fname
    # # t.dump_compressed_pickle(fname, merged_lists)
    #
    # print "Iterative merging"
    # merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists)
    # print "Iterative merging done. Merged lists:", len(merged_lists)
    # # fname = os.path.join(data_path, "iterative_merged_"+kplet_file)
    # # print "Dumping Iterative merging: ",fname
    # # t.dump_compressed_pickle(fname, merged_lists)
    # print "Completed in:", time.time()-tic, "(s)"

    csv_kplet_file = os.path.join(data_path, kplet_file.split('.')[0]+".csv")
    csv_merged_lists_file = os.path.join(data_path, "iterative_merged_"+kplet_file.split('.')[0]+".csv")
    print "Writing kplets to csv file:"
    print csv_kplet_file

    t.write_kplets_to_csv(kplets, csv_kplet_file)
    tic = time.time()
    print "Starting kpletmerger with params:"
    print "kpletmerger", csv_kplet_file, csv_merged_lists_file
    print "\n\n"
    sp.call(["kpletmerger",csv_kplet_file,csv_merged_lists_file])
    print "\n\n"
    print "Completed in:", time.time()-tic, "(s)"
Пример #7
0
def merging_pipeline_for_order(order, data_path, load_from_db=False):
    limit_to = 1000000000
    print "starting for ", order
    if load_from_db:
        print "Loading kplets from DB"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
            kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'
            kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)
    else:
        print "Loading kplets from pickle file"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'

    kplet_file_full = os.path.join(data_path, kplet_file)
    print "Loading :", kplet_file_full
    kplets = t.load_compressed_pickle(kplet_file_full)

    print "No of kplets:", len(kplets)

    #print "Loading file2genes"
    # tic = time.time()
    # print "Basic merging"
    # merged_lists = merging.basic_merge_within_orders(kplets)
    # print "Basic merging done. Merged lists:", len(merged_lists)
    # # fname = os.path.join(data_path, "basic_merged_"+kplet_file)
    # # print "Dumping basic merging: ", fname
    # # t.dump_compressed_pickle(fname, merged_lists)
    #
    # print "Iterative merging"
    # merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists)
    # print "Iterative merging done. Merged lists:", len(merged_lists)
    # # fname = os.path.join(data_path, "iterative_merged_"+kplet_file)
    # # print "Dumping Iterative merging: ",fname
    # # t.dump_compressed_pickle(fname, merged_lists)
    # print "Completed in:", time.time()-tic, "(s)"

    csv_kplet_file = os.path.join(data_path, kplet_file.split('.')[0] + ".csv")
    csv_merged_lists_file = os.path.join(
        data_path, "iterative_merged_" + kplet_file.split('.')[0] + ".csv")
    print "Writing kplets to csv file:"
    print csv_kplet_file

    t.write_kplets_to_csv(kplets, csv_kplet_file)
    tic = time.time()
    print "Starting kpletmerger with params:"
    print "kpletmerger", csv_kplet_file, csv_merged_lists_file
    print "\n\n"
    sp.call(["kpletmerger", csv_kplet_file, csv_merged_lists_file])
    print "\n\n"
    print "Completed in:", time.time() - tic, "(s)"
Пример #8
0
def merging_pipeline_for_order(order, load_from_db=False):
    limit_to = 1000000000
    print "starting for ", order
    if load_from_db:
        print "Loading kplets from DB"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
            #kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
            kplets = q.get_report_kplets(profile_id2code, limit_to=limit_to)
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'
            kplets = p.get_report_kplets(profile_id2code, limit_to=limit_to)
    else:
        print "Loading kplets from pickle file"
        if order == 2:
            kplet_file = 'duplets.p.bz2'
        elif order == 3:
            kplet_file = 'triplets.p.bz2'
        elif order == 4:
            kplet_file = 'quadruplets.p.bz2'
        elif order == 5:
            kplet_file = 'pentaplets.p.bz2'
	kplet_file_full = os.path.join(data_path, kplet_file)
        print kplet_file_full
        kplets = t.load_compressed_pickle(kplet_file_full)



    # print "Starting for", kplet_file
    # print "Loading kplets"
    # kplets = t.load_compressed_pickle(os.path.join(data_path, kplet_file))
    print "No of kplets:", len(kplets)
    
    #print "Loading file2genes"

    #_file2genes = {}
    #for _f in os.listdir(neighborhood_files_path):
    #    _file2genes[_f] = dt.get_wgs_file(os.path.join(neighborhood_files_path, _f))

    # print 'Filtering'
    # kplets = filter_seed(kplets, _file2genes)
    # print "No of kplets:", len(kplets)
    # fname = os.path.join(data_path,  kplet_file.split('.')[0]+'_seed.p.bz2')
    # print 'Dumping', fname
    # t.dump_compressed_pickle(fname, kplets)

    print "Basic merging"
    merged_lists = merging.basic_merge_within_orders(kplets)
    fname = os.path.join(data_path, "basic_merged_"+kplet_file)
    print "Dumping basic merging: ", fname
    t.dump_compressed_pickle(fname, merged_lists)

    print "Iterative merging"
    merged_lists = merging.merge_kplets_within_orders_iterative(merged_lists)
    fname = os.path.join(data_path, "iterative_merged_"+kplet_file)
    print "Dumping Iterative merging: ",fname
    t.dump_compressed_pickle(fname, merged_lists)