Exemplo n.º 1
0
 def MINT_homo_tenTrial_to_csvData():
     predPPIs = {}
     ggi_df, ppi_df = MINT.parse_MINT(
         ppiFile='./data/MINT/species human',
         uniProtMap="./data/UniProt/uniprot-taxonomy_9606.tab",
         wFile_GGI='./data/parsed/MINT_homo_GGI.pkl',
         wFile_PPI='./data/parsed/MINT_homo_PPI.pkl',
         root='../')
     dataset_len = int(int(len(ppi_df.index) * 0.5) * 0.1)
     with open('./resultData/h**o/trimmed_predPPIs_homo.json', 'r') as f:
         for line in f.readlines():
             predPPIs.update(json.loads(line))
     geneToEntry = helper.uniprot_map()
     baseTags = [
         'commonNeighbor', 'L3uvJoin', 'xyContrib_dualCN_uvJoin', 'CRA',
         'CH2_L3', 'Sim'
     ]
     for dataset in ["MINT_homo"]:
         tags = [
             "{}_tenTrial_{}".format(baseTag, dataset)
             for baseTag in baseTags
         ]
         for i in range(len(tags)):
             tag = tags[i]
             PPIsList = [[[geneToEntry[g[0]], geneToEntry[g[1]]]
                          for g in j[:dataset_len]
                          if g[0] in geneToEntry and g[1] in geneToEntry]
                         for j in predPPIs[tag]]
             for j in range(len(PPIsList)):
                 with open('./GoSemSimPrepData/{}_{}.csv'.format(tag, j),
                           'w') as f:
                     f.write(
                         "\n".join(['nodeA\tnodeB'] +
                                   ["\t".join(ppi)
                                    for ppi in PPIsList[j]]) + "\n")
Exemplo n.º 2
0
def run_for_a_trajectory(PARMS, filename, nucleotides, charges):
    universe = MDAnalysis.Universe(PARMS["file_name"], filename)
    TimeTable = MINT.ReadInTrajectory(nucleotides, universe, PARMS)
    PARMS["last_frame"] = len(universe.trajectory)
    manager = multiprocessing.Manager()
    trajs, num_of_frames, should = MINT.divide_trajectory(PARMS)
    print "Trajectory ", filename, "has ", len(universe.trajectory),
    name = filename.replace(".dcd", "")
    print " frames, running for ", num_of_frames
    que = manager.list()
    ths = [
        multiprocessing.Process(target=for_a_sub_traj,
                                args=(nucleotides, charges, PARMS, TimeTable,
                                      trajs[i], que, name))
        for i in range(PARMS["threads"])
    ]
    for p in ths:
        p.start()
    for p in ths:
        p.join()
    return que
Exemplo n.º 3
0
def run():
    PARMS = MINT.inside_read_in_parms()
    PARMS["OUT_FILE"] = open(
        PARMS["working_dir"] + "/" + PARMS["out_name"] + "_hbonds_log.txt",
        "w")
    nucleotides = MINT.get_nucleic_from_pdb(PARMS)
    charges = MINT.read_in_charges(nucleotides, PARMS)
    pickles = {}
    nuc_nums = []

    if PARMS["nucleotides"] != "":
        for i in PARMS["nucleotides"].split(";"):
            if i:
                tmp = [
                    a for a in i.replace("(", "").replace(")", "").split('-')
                    if a
                ]
                nuc_nums.extend(range(int(tmp[0]), int(tmp[1]) + 1))
        nuc = []
        for i in nucleotides:
            if i.get_id()[1] in nuc_nums:
                nuc.append(i)
        nucleotides = nuc
    if "out_dictionaries_MINT" in PARMS["files_dcd"][0]:
        read_in_mint_pickles(
            PARMS["working_dir"] + "/" + PARMS["files_dcd"][0],
            PARMS["working_dir"] + "/" + PARMS["out_name"] + ".csv")
    else:
        for filename in PARMS["files_dcd"]:
            filename_and_dir = PARMS["working_dir"] + filename
            pickles[filename] = run_for_a_trajectory(PARMS, filename_and_dir,
                                                     nucleotides, charges)
        put_together(pickles,
                     PARMS["working_dir"] + PARMS["out_name"] + ".csv",
                     PARMS["files_dcd"], PARMS["working_dir"])
        print "Written to file", PARMS["working_dir"] + PARMS[
            "out_name"] + ".csv"
Exemplo n.º 4
0
def for_a_sub_traj(nucleotides, charges, PARMS, TimeTable, ran, que, name):
    out = {}
    ppkl = name + "_" + str(min(ran)) + "_" + str(max(ran)) + ".pkl"
    if os.path.isfile(ppkl) and PARMS["only_analysis"]:
        que.append(ppkl)
        print "Not running for ", ppkl
    else:
        print "    running for ", ppkl
        for N in ran:
            dd = MINT.measure_for_all(nucleotides, charges, PARMS, TimeTable,
                                      N)
            out[N] = sum_of_hbonds(dd)
            out[N].extend(sum_of_stacking(dd))
        pickle.dump(out, open(ppkl, "wb"))
        que.append(ppkl)
Exemplo n.º 5
0
    def append_precRecMap_multiCore(fNames,
                                    predPPI,
                                    samplePPI,
                                    datasetClass,
                                    coreNo,
                                    isGGI=False,
                                    logging=False):
        if isGGI: i = 0
        else: i = 1
        fullPPISet = {
            'bioGRID': [
                list(ppi)
                for ppi in np.asarray([*bg.parse_bioGRID(
                    root='../')][i][['nodeA', 'nodeB']])
            ],
            'STRING': [
                list(ppi)
                for ppi in np.asarray([*string.parse_STRING(
                    root='../')][i][['nodeA', 'nodeB']])
            ],
            'MINT': [
                list(ppi) for ppi in np.asarray([*MINT.parse_MINT(
                    root='../')][i][['nodeA', 'nodeB']])
            ],
            'IntAct_spoke': [
                list(ppi) for ppi in np.asarray([
                    *IntAct.parse_IntAct(root='../', spokeModel=True)
                ][i][['nodeA', 'nodeB']])
            ]
        }

        fullPrecRecMap = {}
        if not os.path.exists("./resultData/PRCurveMap.json"):
            with open("./resultData/PRCurveMap.json", "w") as f:
                f.write(json.dumps(fullPrecRecMap))

        precRecMap = ppiLPred.precRecMap_multiCore(
            fNames, predPPI, samplePPI, [fullPPISet[i] for i in datasetClass],
            coreNo, logging)
        with open('./resultData/PRCurveMap.json', 'r') as f:
            fullPrecRecMap = json.loads(f.read())
        fullPrecRecMap.update(precRecMap)
        with open('./resultData/PRCurveMap.json', 'w') as f:
            f.write(json.dumps(fullPrecRecMap))
Exemplo n.º 6
0
    def trim_ppi_result(fNames, datasetClass):
        # get only the top PPI & scores equal to the size of its original dataset
        trimNum = {
            'bioGRID':
            int(len([*bg.parse_bioGRID(root='../')][1].index) * 0.5),
            'STRING':
            int(len([*string.parse_STRING(root='../')][1].index) * 0.5),
            'MINT':
            int(len([*MINT.parse_MINT(root='../')][1].index) * 0.5),
            'IntAct_spoke':
            int(
                len([*IntAct.parse_IntAct(root='../', spokeModel=True)
                     ][1].index) * 0.5)
        }

        if not os.path.exists('./resultData/trimmed_predPPIs.json'):
            with open('./resultData/trimmed_predPPIs.json', 'w') as f:
                pass
            with open('./resultData/trimmed_predScores.json', 'w') as f:
                pass

        for i in range(len(fNames)):
            predPPI, predScore = [], []
            with open("./resultData/{}_PPI.json".format(fNames[i]), 'r') as f:
                for line in f.readlines():
                    predPPI.append(
                        json.loads(line)[0:trimNum[datasetClass[i]]])
            with open("./resultData/{}_score.json".format(fNames[i]),
                      'r') as f:
                for line in f.readlines():
                    predScore.append(
                        json.loads(line)[0:trimNum[datasetClass[i]]])
            predPPIs, predScores = {fNames[i]: predPPI}, {fNames[i]: predScore}
            with open('./resultData/trimmed_predPPIs.json', 'a+') as f:
                f.write(json.dumps(predPPIs) + "\n")
            with open('./resultData/trimmed_predScores.json', 'a+') as f:
                f.write(json.dumps(predScores) + "\n")
Exemplo n.º 7
0
    def trim_multiple_ppi_result(fNames, datasetClass, trialSize):
        trimNum = {
            'bioGRID':
            int(len([*bg.parse_bioGRID(root='../')][1].index) * 0.5),
            'STRING':
            int(len([*string.parse_STRING(root='../')][1].index) * 0.5),
            'MINT':
            int(len([*MINT.parse_MINT(root='../')][1].index) * 0.5),
            'IntAct_spoke':
            int(
                len([*IntAct.parse_IntAct(root='../', spokeModel=True)
                     ][1].index) * 0.5)
        }

        if not os.path.exists('./resultData/trimmed_predPPIs.json'):
            with open('./resultData/trimmed_predPPIs.json', 'w') as f:
                pass
            with open('./resultData/trimmed_predScores.json', 'w') as f:
                pass

        for i in range(len(fNames)):
            predPPI, predScore = [], []
            for j in range(trialSize):
                with open("./resultData/{}_{}_PPI.json".format(fNames[i], j),
                          'r') as f:
                    predPPI.append(
                        json.loads(f.read())[0:trimNum[datasetClass[i]]])
                with open("./resultData/{}_{}_score.json".format(fNames[i], j),
                          'r') as f:
                    predScore.append(
                        json.loads(f.read())[0:trimNum[datasetClass[i]]])
            predPPIs, predScores = {fNames[i]: predPPI}, {fNames[i]: predScore}
            with open('./resultData/trimmed_predPPIs.json', 'a+') as f:
                f.write(json.dumps(predPPIs) + "\n")
            with open('./resultData/trimmed_predScores.json', 'a+') as f:
                f.write(json.dumps(predScores) + "\n")
Exemplo n.º 8
0
    def trim_multiple_ppi_result(fNames, datasetClass, trialSize):
        bioGRID_homo = int(
            len([
                *bg.parse_bioGRID(
                    filename=
                    './data/BioGRID/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.tab2.txt',
                    wFile_GGI='./data/parsed/BioGRID_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/BioGRID_homo_PPI.pkl',
                    root="../")
            ][1].index) * 0.5)

        STRING_homo = int(
            len([
                *string.parse_STRING(
                    ppiFile='./data/STRING/9606.protein.links.v11.0.txt',
                    typeFile='./data/STRING/9606.protein.actions.v11.0.txt',
                    uniProtMap=
                    './data/UniProt/uniprot-taxonomy_9606_STRING.tab',
                    root='../',
                    wFile_GGI='./data/parsed/STRING_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/STRING_homo_PPI.pkl')
            ][1].index) * 0.5)

        MINT_homo = int(
            len([
                *MINT.parse_MINT(
                    ppiFile='./data/MINT/species human',
                    uniProtMap="./data/UniProt/uniprot-taxonomy_9606.tab",
                    wFile_GGI='./data/parsed/MINT_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/MINT_homo_PPI.pkl',
                    root="../")
            ][1].index) * 0.5)

        trimNum = {
            'HuRI': int(len(HuRI.parse_HuRI(root='../').index) * 0.5),
            "bioGRID_homo": bioGRID_homo,
            "STRING_homo": STRING_homo,
            "MINT_homo": MINT_homo
        }

        if not os.path.exists('./resultData/h**o/trimmed_predPPIs.json'):
            with open('./resultData/h**o/trimmed_predPPIs.json', 'w') as f:
                pass
            with open('./resultData/h**o/trimmed_predScores.json', 'w') as f:
                pass

        for i in range(len(fNames)):
            predPPI, predScore = [], []
            for j in range(trialSize):
                with open(
                        "./resultData/h**o/{}_{}_PPI.json".format(
                            fNames[i], j), 'r') as f:
                    predPPI.append(
                        json.loads(f.read())[0:trimNum[datasetClass[i]]])
                with open(
                        "./resultData/h**o/{}_{}_score.json".format(
                            fNames[i], j), 'r') as f:
                    predScore.append(
                        json.loads(f.read())[0:trimNum[datasetClass[i]]])
            predPPIs, predScores = {fNames[i]: predPPI}, {fNames[i]: predScore}
            with open('./resultData/h**o/trimmed_predPPIs.json', 'a+') as f:
                f.write(json.dumps(predPPIs) + "\n")
            with open('./resultData/h**o/trimmed_predScores.json', 'a+') as f:
                f.write(json.dumps(predScores) + "\n")
Exemplo n.º 9
0
    def append_precRecMap_multiCore(fNames,
                                    predPPI,
                                    samplePPI,
                                    datasetClass,
                                    coreNo,
                                    isGGI=False,
                                    logging=False):
        if isGGI: i = 0
        else: i = 1

        bioGRID_homo = [
            list(ppi) for ppi in np.asarray([
                *bg.parse_bioGRID(
                    filename=
                    './data/BioGRID/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.tab2.txt',
                    wFile_GGI='./data/parsed/BioGRID_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/BioGRID_homo_PPI.pkl',
                    root="../")
            ][i][['nodeA', 'nodeB']])
        ]

        STRING_homo = [
            list(ppi) for ppi in np.asarray([
                *string.parse_STRING(
                    ppiFile='./data/STRING/9606.protein.links.v11.0.txt',
                    typeFile='./data/STRING/9606.protein.actions.v11.0.txt',
                    uniProtMap=
                    './data/UniProt/uniprot-taxonomy_9606_STRING.tab',
                    root='../',
                    wFile_GGI='./data/parsed/STRING_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/STRING_homo_PPI.pkl')
            ][i][['nodeA', 'nodeB']])
        ]

        MINT_homo = [
            list(ppi) for ppi in np.asarray([
                *MINT.parse_MINT(
                    ppiFile='./data/MINT/species human',
                    uniProtMap="./data/UniProt/uniprot-taxonomy_9606.tab",
                    wFile_GGI='./data/parsed/MINT_homo_GGI.pkl',
                    wFile_PPI='./data/parsed/MINT_homo_PPI.pkl',
                    root="../")
            ][i][['nodeA', 'nodeB']])
        ]

        fullPPISet = {
            'HuRI': [
                list(ppi) for ppi in np.asarray(
                    HuRI.parse_HuRI(root='../')[['nodeA', 'nodeB']])
            ],
            'bioGRID_homo':
            bioGRID_homo,
            'STRING_homo':
            STRING_homo,
            'MINT_homo':
            MINT_homo
        }

        fullPrecRecMap = {}
        if not os.path.exists("./resultData/PRCurveMap_homo.json"):
            with open("./resultData/PRCurveMap_homo.json", "w") as f:
                f.write(json.dumps(fullPrecRecMap))

        precRecMap = ppiLPred.precRecMap_multiCore(
            fNames, predPPI, samplePPI, [fullPPISet[i] for i in datasetClass],
            coreNo, logging)
        with open('./resultData/PRCurveMap_homo.json', 'r') as f:
            fullPrecRecMap = json.loads(f.read())
        fullPrecRecMap.update(precRecMap)
        with open('./resultData/PRCurveMap_homo.json', 'w') as f:
            f.write(json.dumps(fullPrecRecMap))