Exemplo n.º 1
0
def test_ARC2016_16(with_small, overwrite):

    import pydream2015
    from os.path import split, join

    pkgdir = split(pydream2015.__file__)[0]

    input_dir = join(pkgdir, 'test_input')
    output_dir = join(pkgdir, 'test_output')

    pydream2015.initdatapath(input_dir, output_dir)

    ppiscore = 950
    alpha = 0.95

    ppifile = pydream2015.MYDATA_PPI_STRING_TRANSLATED
    gexfile = pydream2015.MYDATA_GENEEXPR_FILLED

    signalfile = join(input_dir, 'team/mschoi/arc2016_16_table(a)cnv.csv')
    resfile = 'smooth_cnv_str.csv'

    run(gexfile, ppifile, signalfile, resfile, ppiscore, alpha)

    signalfile = join(input_dir, 'team/mschoi/arc2016_16_table(b)methyl.csv')
    resfile = 'smooth_methyl_str.csv'

    run(gexfile, ppifile, signalfile, resfile, ppiscore, alpha)
Exemplo n.º 2
0
def test_divide_train_leaderboard():
    sys.path.append('..')
    import pydream2015
    indir = join(pathsplit(pydream2015.__file__)[0], 'test_input')
    outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output')
    indir = os.path.abspath(indir)
    outdir = os.path.abspath(outdir)
    pydream2015.initdatapath(indir, outdir)

    trainids, testids = pydream2015.util.divide_train_leaderboard()
Exemplo n.º 3
0
def test_calc_dd3d():
    sys.path.append('../..')
    import pydream2015
    from pydream2015 import feature 
    pydream2015.initdatapath('../test_input', '../test_output')

    in_dd3d = pydream2015.DATA_DRUG_DESC_3D
    out_dd3d_conv = pydream2015.MYDATA_DRUG_DESC_3D_CONV

    if not exists(out_dd3d_conv):
        feature.calc_dd3d(in_dd3d, out_dd3d_conv) 
    else: 
        print '>>', out_dd3d_conv, '- skipped'
Exemplo n.º 4
0
def test_fill_missinggex():

    sys.path.append('../..')
    import pydream2015

    pydream2015.initdatapath('../test_input', '../test_output')

    in_therapy = pydream2015.DATA_COMBITHERAPY
    in_gex = pydream2015.DATA_GENEEXPR
    in_cellinfo = pydream2015.DATA_CELLINFO
    out_gexfilled = pydream2015.MYDATA_GENEEXPR_FILLED

    fill_missinggex(in_therapy, in_gex, in_cellinfo, out_gexfilled)

    assert exists(out_gexfilled)
Exemplo n.º 5
0
def test_divide_combi():

    import pydream2015
    indir = join(pathsplit(pydream2015.__file__)[0], 'test_input')
    outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output')
    indir = os.path.abspath(indir)
    outdir = os.path.abspath(outdir)
    pydream2015.initdatapath(indir, outdir)

    therapy_traindata = pd.read_csv(pydream2015.DATA_COMBITHERAPY)
    trainids, testids = pydream2015.util.divide_combi(therapy_traindata,
                                                      ratio=0.3)

    print(len(trainids))
    print(len(testids))

    pass
Exemplo n.º 6
0
def test_make_biomartsdict():

    sys.path.append('../..')
    import pydream2015

    pydream2015.initdatapath('../test_input', '../test_output')

    in_ensemblid = pydream2015.DATA_ENSEMBLID
    out_dict = pydream2015.MYDATA_DICT

    print('>>', out_dict)

    if not exists(out_dict):
        make_biomartsdict(in_ensemblid, out_dict, test=False)
    else:
        print('- skipped')

    assert exists(out_dict)
Exemplo n.º 7
0
def test_translate_STRING():

    sys.path.append('../..')
    import pydream2015

    pydream2015.initdatapath('../test_input', '../test_output')

    in_ppi = pydream2015.DATA_PPI_STRING
    in_dict = pydream2015.MYDATA_DICT
    out_processed = pydream2015.MYDATA_PPI_STRING_PROCESSED
    out_translated = pydream2015.MYDATA_PPI_STRING_TRANSLATED
    out_hist = pydream2015.MYDATA_PPI_STRING_HIST

    translate_STRING(in_ppi,
                     in_dict,
                     out_processed,
                     out_translated,
                     out_hist,
                     test=False)

    assert exists(out_processed)
    assert exists(out_translated)
    assert exists(out_hist)
Exemplo n.º 8
0
# This file is part of Pydream2015.
#*************************************************************************
import os, sys, cPickle as pickle, re, json, pandas as pd, numpy as np, pytest
import pydream2015

from sets import Set
from multiprocessing import Pool
from os.path import exists, split as pathsplit, join
from pdb import set_trace
from os.path import split,join

indir = join(split(pydream2015.__file__)[0], 'test_input')

outdir = join(split(pydream2015.__file__)[0], 'test_output')

pydream2015.initdatapath(indir, outdir)

therapy = pydream2015.DATA_COMBITHERAPY

therapy_test = pydream2015.DATA_COMBITHERAPY_TEST

therapy_leader = pydream2015.DATA_COMBITHERAPY_LEADER

gexdata = pydream2015.MYDATA_GENEEXPR_FILLED 

mutdata = pydream2015.DATA_MUTATION 

dd3dc = pydream2015.MYDATA_DRUG_DESC_3D_CONV

smoothed_CNV_STRING = pydream2015.MYDATA_SMOOTHED_CNV_STRING
Exemplo n.º 9
0
def test_reshape_therapy(with_small,
                         overwrite,
                         outputfile='therapy_expanded.csv'):

    overwrite = True

    if exists(outputfile) and (overwrite == False):
        return

    import sys
    sys.path.append('..')

    import pydream2015
    from pydream2015.util import update_progress

    indir = join(pathsplit(pydream2015.__file__)[0], 'test_input')
    outdir = join(pathsplit(pydream2015.__file__)[0], 'test_output')
    indir = os.path.abspath(indir)
    outdir = os.path.abspath(outdir)
    pydream2015.initdatapath(indir, outdir)

    output_df = pd.DataFrame([], columns=['CELL_LINE', 'COMBINATION_ID', \
            'COMPOUND_A', 'COMPOUND_B', 'DOSE_A', 'DOSE_B', 'UNIT_A', \
            'UNIT_B', 'RESPONSE', 'METHOD', 'FILE_SOURCE'])

    df1 = pd.read_csv(pydream2015.DATA_COMBITHERAPY)

    # 디렉토리 내용가져오기
    raw_data_dir = pydream2015.DIR_TRAINING_COMBI_THERAPY

    files = glob.glob(join(raw_data_dir, '*.csv'))

    idx = 0

    for kk, thisfile in enumerate(files):
        update_progress(kk, len(files))
        filedata = pd.read_csv(thisfile, index_col='Unnamed: 0')

        dim_agent_1 = filedata.index.values.tolist().index('(=Agent 1)')
        dim_agent_2 = filedata.columns.values.tolist().index('(=Agent 2)')
        agent2_doses = filedata.columns.values.tolist()[0:dim_agent_2]
        agent1_doses = filedata.index.values.tolist()[0:dim_agent_1]

        lbl_agent_1 = filedata.loc['Agent1', agent2_doses[0]]
        lbl_agent_2 = filedata.loc['Agent2', agent2_doses[0]]
        unit_1 = filedata.loc['Unit1', agent2_doses[0]]
        unit_2 = filedata.loc['Unit2', agent2_doses[0]]
        title = filedata.loc['Title', agent2_doses[0]]

        for agent1 in agent1_doses:
            for agent2 in agent2_doses:
                response = filedata.loc[agent1, agent2]
                output_df.loc[idx, 'CELL_LINE'] = title
                output_df.loc[
                    idx, 'COMBINATION_ID'] = lbl_agent_1 + '.' + lbl_agent_2
                output_df.loc[idx, 'COMPOUND_A'] = lbl_agent_1
                output_df.loc[idx, 'COMPOUND_B'] = lbl_agent_2
                output_df.loc[idx, 'DOSE_A'] = agent1
                output_df.loc[idx, 'DOSE_B'] = agent2
                output_df.loc[idx, 'UNIT_A'] = unit_1
                output_df.loc[idx, 'UNIT_B'] = unit_2
                output_df.loc[idx, 'RESPONSE'] = response

                if (agent1 == "0") and (agent2 == "0"):
                    output_df.loc[idx, 'METHOD'] = 'CONTROL'
                elif (agent1 == "0") or (agent2 == "0"):
                    output_df.loc[idx, 'METHOD'] = 'MONO'
                else:
                    output_df.loc[idx, 'METHOD'] = 'COMBI'

                output_df.loc[idx, 'FILE_SOURCE'] = os.path.split(thisfile)[1]

                idx += 1

    output_df.to_csv('therapy_expanded.csv', index=False)