Exemplo n.º 1
0
def main():
    try:
        debug=ast.literal_eval(sys.argv[1])
    except IndexError:
        debug=True

    if (debug):
        print ("***************************************\n"
               "\t\t\t DEBUG \n"
               "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S009286741300439X-mmc1.txt"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)


    organisms = ["Human"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(Path(log_dir) / Path("Mapping_the_Human_miRNA_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json('paper',
                            "Mapping the Human miRNA Interactome by CLASH Reveals Frequent Noncanonical Binding")
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json('paper_url', "https://www.sciencedirect.com/science/article/pii/S009286741300439X")
        p = Pipeline(paper_name="Mapping_the_Human_miRNA",
                     organism=organism,
                     in_df=df_prepare(read_paper_data(interaction_file, debug)),
                     tmp_dir=tmp_dir)

        p.run()
Exemplo n.º 2
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    mouse_config = {
        "organism": "Mouse",
        "interaction_file": "Papers/ncomms9864-s2.xlsx"
    }
    human_config = {
        "organism": "Human",
        "interaction_file": "Papers/ncomms9864-s4.xlsx"
    }

    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)
    log_dir = "Datafiles_Prepare/Logs/"

    for cnfg in [mouse_config, human_config]:
        organism = cnfg["organism"]
        interaction_file = cnfg["interaction_file"]

        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Darnell_miRNA_target_chimeras_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "miRNA–target chimeras reveal miRNA 3-end pairing as a major determinant of Argonaute target specificity"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json('paper_url',
                            "https://www.nature.com/articles/ncomms9864")

        org = Darnell_miRNA_target_chimeras(interaction_file,
                                            tmp_dir,
                                            organism,
                                            debug=debug)
        org.run()

        print("Pipeline start")
        p = Pipeline(paper_name="Darnell_miRNA_target_chimeras",
                     organism=organism,
                     in_df=org.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)

        p.run()
Exemplo n.º 3
0
def main():
    p = Path("Datafiles_Prepare/CSV")
    log_dir = "Datafiles_Prepare/Logs/"

    files = list(p.glob('**/*.csv'))

    run_list = [p for p in files if not p.match("*duplex*")]
    process_list = []
    for fin in run_list:
        fout = utils.filename_suffix_append(fin, "_duplex")
        tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

        p = Process(target=seed_interaction_add, args=(fin, fout, tmp_dir))
        p.start()
        process_list.append(p)
        print(f"start process {p.name} {fin}")

    for p in process_list:
        p.join()
Exemplo n.º 4
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S1097276516305214-mmc3.xlsx"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Celegans"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Pairing_Beyond_Seed_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Pairing beyond the Seed Supports MicroRNA Targeting Specificity")
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.sciencedirect.com/science/article/pii/S1097276516305214#mmc3"
        )

        ce = Pairing_Beyond_Seed(input_file=interaction_file,
                                 organism=organism,
                                 tmp_dir=tmp_dir,
                                 debug=debug)
        ce.run()

        p = Pipeline(paper_name="Pairing_Beyond_Seed",
                     organism=organism,
                     in_df=ce.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)
        p.run()
Exemplo n.º 5
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/41598_2017_7880_MOESM4_ESM.csv"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Cow"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Global_Mapping_Cattle_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Global mapping of miRNA-target interactions in cattle (Bos taurus)"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.nature.com/articles/s41598-017-07880-8#MOESM1")

        cow = Global_Mapping_Cattle(input_file=interaction_file,
                                    tmp_dir=tmp_dir,
                                    debug=debug)

        cow.run()

        p = Pipeline(paper_name="Global_Mapping_Cattle",
                     organism=organism,
                     in_df=cow.prepare_for_pipeline(),
                     tmp_dir=tmp_dir)
        p.run()
Exemplo n.º 6
0
def main():
    try:
        debug = ast.literal_eval(sys.argv[1])
    except IndexError:
        debug = True

    if (debug):
        print("***************************************\n"
              "\t\t\t DEBUG \n"
              "***************************************\n")

    interaction_file = str(Path("Papers/1-s2.0-S1097276514003566-mmc3.xls"))
    log_dir = "Datafiles_Prepare/Logs/"
    tmp_dir = utils.make_tmp_dir("Datafiles_Prepare/tmp_dir", parents=True)

    organisms = ["Celegans", "Human", "Mouse"]
    for organism in organisms:
        JsonLog.set_filename(
            utils.filename_date_append(
                Path(log_dir) /
                Path("Unambiguous_Identification_" + organism + ".json")))
        JsonLog.add_to_json('file name', interaction_file)
        JsonLog.add_to_json(
            'paper',
            "Unambiguous Identification of miRNA:Target Site Interactions by Different Types of Ligation Reactions"
        )
        JsonLog.add_to_json('Organism', organism)
        JsonLog.add_to_json(
            'paper_url',
            "https://www.sciencedirect.com/science/article/pii/S1097276514003566#app3"
        )
        p = Pipeline(paper_name="Unambiguous_Identification",
                     organism=organism,
                     in_df=df_prepare(
                         read_paper_data(interaction_file, organism, debug)),
                     tmp_dir=tmp_dir)

        p.run()
Exemplo n.º 7
0
def main():
    input_dir = Path("Datafiles_Prepare/CSV")
    output_dir = Path("Features/CSV")
    log_dir = Path("Features/Logs")
    tmp_base = "Features/tmp_dir"

    files = list(input_dir.glob("*_duplex_*.csv"))

    process_list = []
    for fin in files:
        tmp_dir = utils.make_tmp_dir(tmp_base, parents=True)
        fout = output_dir / f"{fin.stem}_feature.csv"
        if fout.exists():
            continue

        flog = log_dir / f"{fin.stem}_feature.json"

        p = Process(target=worker, args=(fin, fout, tmp_dir))
        p.start()
        process_list.append(p)
        print(f"start process {p.name} {fin}")
    for p in process_list:
        p.join()
Exemplo n.º 8
0
def main():

    input_dir = Path("Datafiles_Prepare/CSV")

    tmp_base = "Features/tmp_dir"

    files = list(input_dir.glob("*duplex*.csv"))
    files = [f for f in files if not f.match("*negative*")]
    print(files)

    process_list = []
    for fin in files:
        tmp_dir = utils.make_tmp_dir(tmp_base, parents=True)

        organism = fin.stem.split("_")[0]
        print(organism)

        p = Process(target=worker, args=(organism, fin, tmp_dir))
        p.start()
        process_list.append(p)
        print(f"start process {p.name} {fin}")

    for p in process_list:
        p.join()