Пример #1
0
    #
    # print 'Dumping to files'
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'pentaplets_merged_across.p.bz2'), 'w')
    # pickle.dump(pentaplets, dump_file)
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'quadruplets_merged_across.p.bz2'), 'w')
    # pickle.dump(quadruplets, dump_file)
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'triplets_merged_across.p.bz2'), 'w')
    # pickle.dump(triplets, dump_file)
    # dump_file = bz2.BZ2File(os.path.join(save_path, 'duplets_merged_across.p.bz2'), 'w')
    # pickle.dump(duplets, dump_file)


if __name__ == '__main__':

    print 'Pre-Loading dictionaries'
    target_profiles = t.bacteria_target_profiles()
    profile2def = t.map_cdd_profile2def()
    gid2arcog_cdd = t.map_gid2arcog_cdd()
    neighborhood_files_path = neighborhoods_path()
    profile_id2code = map_id2cdd()

    # for limit_to, report_dir in zip([300, 500, 1000, 100000],['top_300', 'top_500', 'top_1000', 'top_100000']):
    #
    #     print "Limit_to:", limit_to
    #     print
    #     generate_plots(limit_to, report_dir, target_profiles, profile2def, gid2arcog_cdd, neighborhood_files_path, profile_id2code)
    #     print 'Done'
    #     print "------------------------"

    data_path = os.path.join(gv.project_data_path, 'Bacteria/pickle/')
    for i, kplet_pool in zip([5, 4, 3, 2], [pentaplets, quadruplets, triplets, duplets]):
        for j, kplet_sublist in enumerate(kplet_pool):
            cur_reports_folder = os.path.join(report_files_dir, str(i))
            if not os.path.exists(cur_reports_folder):
                os.mkdir(cur_reports_folder)
            xls_file_name = os.path.join(cur_reports_folder,  "%d_%d.xls" % (j+1, i))
            r.write_to_xls(xls_file_name,kplet_sublist,target_profiles,profile2def,gid2arcog_cdd,neighborhood_files_path,file2src_src2org_map)


if __name__ == '__main__':

    import cPickle
    import bz2

    print 'Pre-Loading dictionaries'
    target_profiles = t.bacteria_target_profiles()
    profile2def = t.map_cdd_profile2def()
    gid2arcog_cdd = t.map_gid2arcog_cdd()
    neighborhood_files_path = neighborhoods_path()
    # profile_id2code = map_id2cdd()
    # pickle.dump(profile_id2code, open('profile_id2code.p','w'))
    profile_id2code = cPickle.load(open('/Users/hudaiber/Projects/NewSystems/code/Bacteria/profile_id2code.p'))

    fname = '/Users/hudaiber/Projects/NewSystems/data/Bacteria/pickle/100000/pentaplets_merged_across.p.bz2'
    f = bz2.BZ2File(fname, 'rb')

    buffer = ""
    while 1:
        data = f.read()
        if data == "":
            break
Пример #3
0
import lib.utils.search_in_genomes as sig
from lib.db import map_id2cdd_clusters
from lib.db.bacteria import pentaplets as p


if __name__=='__main__':

    work_path = os.path.join(gv.project_data_path, 'Bacteria/cases')
    pty_path = gv.pty_data_path

    kplet_id = 306123

    id2cdd = map_id2cdd_clusters()
    kplet = p.get_report_kplet(kplet_id, id2cdd, load_locations=True)

    target_profiles = set(t.bacteria_target_profiles())
    dump_file = os.path.join(work_path, 'kplet.p.bz2')
    t.dump_compressed_pickle(dump_file, kplet)

    kplet = t.load_compressed_pickle(dump_file)
    kplet_codes = kplet.codes.difference(target_profiles)

    org2src, src2blocks = sig.search_kplet_in_genomes(kplet_codes, target_profiles, max_dist=4)

    # dump_file = os.path.join(work_path, 'org2src_global.p.bz2')
    # t.dump_compressed_pickle(dump_file, org2src)
    # dump_file = os.path.join(work_path, 'src2blocks_global.p.bz2')
    # t.dump_compressed_pickle(dump_file, src2blocks)
    #
    # dump_file = os.path.join(work_path, 'org2src_global.p.bz2')
    # org2src = t.load_compressed_pickle(dump_file)
Пример #4
0
import lib.utils.reporting as r
import lib.utils.search_in_genomes as sig
from lib.db import map_id2cdd_clusters
from lib.db.bacteria import pentaplets as p

if __name__ == '__main__':

    work_path = os.path.join(gv.project_data_path, 'Bacteria/cases')
    pty_path = gv.pty_data_path

    kplet_id = 306123

    id2cdd = map_id2cdd_clusters()
    kplet = p.get_report_kplet(kplet_id, id2cdd, load_locations=True)

    target_profiles = set(t.bacteria_target_profiles())
    dump_file = os.path.join(work_path, 'kplet.p.bz2')
    t.dump_compressed_pickle(dump_file, kplet)

    kplet = t.load_compressed_pickle(dump_file)
    kplet_codes = kplet.codes.difference(target_profiles)

    org2src, src2blocks = sig.search_kplet_in_genomes(kplet_codes,
                                                      target_profiles,
                                                      max_dist=4)

    # dump_file = os.path.join(work_path, 'org2src_global.p.bz2')
    # t.dump_compressed_pickle(dump_file, org2src)
    # dump_file = os.path.join(work_path, 'src2blocks_global.p.bz2')
    # t.dump_compressed_pickle(dump_file, src2blocks)
    #