示例#1
0
def main(dataset_name=constants.DATASET_NAME, disease_name=None, expected_genes = None, score_method=constants.DEG_EDGER, network_file_name="dip.sif"):
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    search_method = "sa"
    network_file_name, score_file_name, score_method, bg_genes= server.init_common_params(network_file_name, score_method)

    results_file_name = init_specific_params(search_method)

    script_file_name=format_script(os.path.join(constants.SH_DIR, "run_{}.sh".format(ALGO_NAME)), BASE_FOLDER=constants.BASE_PROFILE,
                  DATASET_DIR=constants.DATASET_DIR,
                  ALGO_DIR=ALGO_DIR, NETWORK_NAME=network_file_name, SCORE_FILE_NAME=score_file_name,
                  IS_GREEDY=str(search_method == "greedy"), OUTPUT_FILE=results_file_name, NUM_OF_MODULES=10, OVERLAP_THRESHOLD=0)

    subprocess.Popen("bash {}".format(script_file_name), shell=True,
                     stdout=subprocess.PIPE, cwd=ALGO_DIR).stdout.read()

    os.remove(script_file_name)
    modules_genes_file_name = os.path.join(constants.OUTPUT_DIR, "{}_{}_module_genes.txt".format(ALGO_NAME, search_method))
    all_bg_genes, modules = extract_modules_and_bg(bg_genes, results_file_name, modules_genes_file_name)

    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(ALGO_NAME + "_" + search_method, dataset_name, modules, all_bg_genes, score_file_name, network_file_name, disease_name, expected_genes)

    output_file_name=os.path.join(constants.OUTPUT_DIR,
                 "{}_{}_client_output.txt".format(ALGO_NAME, search_method))
    output_modules(output_file_name, modules, score_file_name, output_base_dir)
示例#2
0
def main(dataset_name=constants.DATASET_NAME,
         disease_name=None,
         expected_genes=None,
         score_method=constants.DEG_EDGER,
         network_file_name="dip.sif"):

    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(
        network_file_name, score_method)

    heat_file_name, network_file_name = init_specific_params(
        score_file_name, score_method, network_file_name)

    script_file_name = format_script(
        os.path.join(constants.SH_DIR, "run_{}.sh".format(ALGO_NAME)),
        ALGO_DIR=ALGO_DIR,
        CACHE_DIR=constants.CACHE_DIR,
        OUTPUT_DIR=constants.OUTPUT_DIR,
        NETWORK_NAME=os.path.splitext(os.path.basename(network_file_name))[0])
    print subprocess.Popen(
        "bash {}".format(script_file_name), shell=True,
        stdout=subprocess.PIPE).stdout.read()  # cwd=dir_path
    os.remove(script_file_name)
    modules, all_bg_genes = extract_modules_and_bg(bg_genes)
    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(ALGO_NAME, dataset_name, modules,
                                            all_bg_genes, score_file_name,
                                            network_file_name, disease_name,
                                            expected_genes)

    output_file_name = os.path.join(constants.OUTPUT_DIR,
                                    "{}_client_output.txt".format(ALGO_NAME))
    output_modules(output_file_name, modules, score_file_name, output_base_dir)
示例#3
0
def main(dataset_name=constants.DATASET_NAME, disease_name=None, expected_genes = None, score_method=constants.DEG_EDGER, network_file_name="dip.sif", fdr=0.05):
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(network_file_name , score_method)

    all_bg_genes, modules = run_bionet_for_all_modules(fdr, network_file_name, score_file_name, constants.IS_PVAL_SCORES)

    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(ALGO_NAME, dataset_name, modules, all_bg_genes, score_file_name, network_file_name, disease_name, expected_genes)

    output_file_name = os.path.join(constants.OUTPUT_DIR,
                                    "{}_client_output.txt".format(ALGO_NAME))
    output_modules(output_file_name, modules, score_file_name, output_base_dir)
def main(dataset_name=constants.DATASET_NAME,
         disease_name=None,
         expected_genes=None):
    global NETWORK_NAME
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(
        NETWORK_NAME)
    STRATEGY = "INES"
    algorithm = "OPTIMAL"
    omitted_genes = []
    modules = []
    all_bg_genes = []
    cur_network_name = NETWORK_NAME
    for cur_i_module in range(40):
        binary_score_file_name, cur_network_file_name = init_specific_params(
            score_file_name, score_method, omitted_genes, network_file_name,
            str(cur_i_module))

        format_scripts(algo_name=ALGO_NAME,
                       score_file_name=binary_score_file_name,
                       network_name=cur_network_file_name,
                       STRATEGY=STRATEGY,
                       algorithm=algorithm)
        print subprocess.Popen("bash {}/run_{}.sh".format(
            constants.SH_DIR, ALGO_NAME),
                               shell=True,
                               stdout=subprocess.PIPE,
                               cwd=ALGO_DIR).stdout.read()
        module, all_bg_gene = extract_module_genes(bg_genes, STRATEGY,
                                                   algorithm)
        if len(module[0]) > 3:
            modules.append(module[0])
            all_bg_genes.append(all_bg_gene[0])
        omitted_genes += list(module[0])
    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(
            ALGO_NAME + "_" + STRATEGY + "_" + algorithm, modules,
            all_bg_genes, score_file_name, network_file_name, disease_name,
            expected_genes)

    output_file_name = os.path.join(
        constants.OUTPUT_DIR,
        "{}_{}_{}_client_output.txt".format(ALGO_NAME, STRATEGY, algorithm))
    output_modules(output_file_name, modules, score_file_name, output_base_dir)
示例#5
0
def main(dataset_name=constants.DATASET_NAME, disease_name=None, expected_genes = None):
    global NETWORK_NAME
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(NETWORK_NAME)
    STRATEGY = "GLONE"
    binary_score_file_name = init_common_params(score_file_name, score_method)
    format_scripts(algo_name=ALGO_NAME, score_file_name=binary_score_file_name, network_name=NETWORK_NAME, STRATEGY=STRATEGY)
    print subprocess.Popen("bash {}/run_{}.sh".format(constants.SH_DIR, ALGO_NAME), shell=True,
                           stdout=subprocess.PIPE, cwd=ALGO_DIR).stdout.read()
    modules, all_bg_genes = extract_module_genes(bg_genes, STRATEGY)

    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(ALGO_NAME + "_" + STRATEGY, modules, all_bg_genes, score_file_name, network_file_name, disease_name, expected_genes)

    output_file_name = os.path.join(constants.OUTPUT_DIR,
                                    "{}_client_output.txt".format(ALGO_NAME))
    output_modules(output_file_name, modules, score_file_name, output_base_dir )
示例#6
0
def main(dataset_name=constants.DATASET_NAME, disease_name=None, expected_genes = None, score_method=constants.DEG_EDGER, network_file_name="dip.sif"):
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(network_file_name, score_method)
    strategy = "INES"
    algorithm = "GREEDY"
    omitted_genes = []
    modules = []
    all_bg_genes = []
    dest_algo_dir = "{}_{}".format(ALGO_DIR, random.random())
    shutil.copytree(ALGO_DIR, dest_algo_dir)
    empty_counter = 0
    for cur_i_module in range(40):
        binary_score_file_name, cur_network_file_name = init_specific_params(score_file_name, score_method, omitted_genes,
                                                                         network_file_name, str(random.random()), dest_algo_dir)

        script_file_name=format_scripts(score_file_name=binary_score_file_name, network_name=cur_network_file_name,
                       STRATEGY=strategy, algorithm=algorithm, algo_dir=dest_algo_dir, dataset_name=dataset_name)
        print subprocess.Popen("bash {}".format(script_file_name), shell=True,
                               stdout=subprocess.PIPE, cwd=dest_algo_dir).stdout.read()
        module, all_bg_gene = extract_module_genes(bg_genes, strategy, algorithm, dest_algo_dir)

        if len(module[0]) > 3:
            empty_counter=0
            modules.append(module[0])
            all_bg_genes.append(all_bg_gene[0])
        else:
            empty_counter+=1
        omitted_genes += list(module[0])
        os.remove(script_file_name)

        if empty_counter>3:
            print "got more that 3 smalle modules in row. continue..."
            break

    shutil.rmtree(dest_algo_dir)

    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports("{}_{}_{}".format(ALGO_NAME,strategy, algorithm), dataset_name, modules, all_bg_genes, score_file_name, network_file_name, disease_name, expected_genes)
    output_file_name = os.path.join(constants.OUTPUT_DIR,
                                    "{}_client_output.txt".format("{}_{}_{}".format(ALGO_NAME,strategy, algorithm)))
    output_modules(output_file_name, modules, score_file_name, output_base_dir )
示例#7
0
def main(dataset_name=constants.DATASET_NAME,
         disease_name=None,
         expected_genes=None,
         score_method=constants.DEG_EDGER):
    global NETWORK_NAME
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(
        NETWORK_NAME, score_method)

    ge_file_name, network_file_name, output_file_name = init_specific_params(
        ge_file_name=os.path.join(constants.DATA_DIR, "ge.tsv"),
        network_file_name=os.path.join(constants.NETWORKS_DIR,
                                       NETWORK_NAME + ".sif"))

    format_script(os.path.join(constants.SH_DIR,
                               "run_{}.sh".format(ALGO_NAME)),
                  ALGO_BASE_DIR=constants.ALGO_BASE_DIR,
                  GE_FILE_NAME=ge_file_name,
                  NETWORK_FILE_NAME=network_file_name,
                  BETA=0.95,
                  MINIMAL_MODULE_SIZE=4,
                  MAXIMAL_MODULE_SIZE=1000,
                  OUTPUT_FILE_NAME=output_file_name)

    subprocess.Popen("bash {}/run_{}.sh".format(constants.SH_DIR, ALGO_NAME),
                     shell=True,
                     stdout=subprocess.PIPE,
                     cwd=ALGO_DIR).stdout.read()

    modules, all_bg_genes = extract_modules_and_bg(bg_genes, output_file_name)

    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(ALGO_NAME, modules, all_bg_genes,
                                            score_file_name, network_file_name,
                                            disease_name, expected_genes)

    output_file_name = os.path.join(constants.OUTPUT_DIR,
                                    "{}_client_output.txt".format(ALGO_NAME))
    output_modules(output_file_name, modules, score_file_name, output_base_dir)
示例#8
0
def main(dataset_name=constants.DATASET_NAME, disease_name=None, expected_genes = None, score_method=constants.DEG_EDGER):
    global NETWORK_NAME
    constants.update_dirs(DATASET_NAME_u=dataset_name)
    network_file_name, score_file_name, score_method, bg_genes = server.init_common_params(NETWORK_NAME, score_method)
    if score_method == constants.PREDEFINED_SCORE:
        raise Exception("Cannot run this algo on scor-based metrics. please provide gene expression file")

    bg_genes, network_file_name = init_specific_params(NETWORK_NAME)

    format_script(os.path.join(constants.SH_DIR, "run_{}.sh".format(ALGO_NAME)), BASE_FOLDER=constants.BASE_PROFILE,
                  DATASET_DIR=constants.DATASET_DIR, ALGO_DIR=ALGO_DIR, NETWORK_NAME=NETWORK_NAME)

    subprocess.Popen("bash {}/run_{}.sh".format(constants.SH_DIR, ALGO_NAME), shell=True,
                     stdout=subprocess.PIPE, cwd=ALGO_DIR).stdout.read()

    modules, all_bg_genes = extract_modules_and_bg(bg_genes)
    output_base_dir = ""
    if constants.REPORTS:
        output_base_dir = build_all_reports(ALGO_NAME, modules, all_bg_genes, score_file_name, network_file_name, disease_name, expected_genes)

    output_file_name = os.path.join(constants.OUTPUT_DIR,
                                    "{}_client_output.txt".format(ALGO_NAME))
    output_modules(output_file_name, modules, score_file_name, output_base_dir )