Пример #1
0
from HMF.code.cross_validation.cross_validation_hmf import CrossValidation
from HMF.drug_sensitivity.load_dataset import load_data_without_empty, load_data_filter

import numpy


''' Load datasets '''
location = project_location+"HMF/drug_sensitivity/data/overlap/"
location_data =                 location+"data_row_01/"
location_features_drugs =       location+"features_drugs/"
location_features_cell_lines =  location+"features_cell_lines/"
location_kernels =              location+"kernels_features/"

R_ccle_ec,  M_ccle_ec, cell_lines, drugs = load_data_without_empty(location_data+"ccle_ec50_row_01.txt")
R_ctrp,     M_ctrp                       = load_data_filter(location_data+"ctrp_ec50_row_01.txt",cell_lines,drugs)
R_gdsc,     M_gdsc                       = load_data_filter(location_data+"gdsc_ic50_row_01.txt",cell_lines,drugs)
R_ccle_ic,  M_ccle_ic                    = load_data_filter(location_data+"ccle_ic50_row_01.txt",cell_lines,drugs)


''' Settings HMF '''
iterations, burn_in, thinning = 200, 150, 2 # 500, 400, 2
no_folds = 10

hyperparameters = {
    'alphatau' : 1.,
    'betatau'  : 1.,
    'alpha0'   : 0.001,
    'beta0'    : 0.001,
    'lambdaF'  : 0.1,
    'lambdaG'  : 0.1,
Пример #2
0
from HMF.code.models.nmf_np import nmf_np
from HMF.code.cross_validation.multiple_nmf_nested_matrix_cross_validation import MultipleNMFNestedCrossValidation
from HMF.drug_sensitivity.load_dataset import load_data_without_empty, load_data_filter

import numpy, random
''' Load datasets '''
location = project_location + "HMF/drug_sensitivity/data/overlap/"
location_data = location + "data_row_01/"
location_features_drugs = location + "features_drugs/"
location_features_cell_lines = location + "features_cell_lines/"
location_kernels = location + "kernels_features/"
''' Concatenate the datasets by ROWS. We remove the columns of the other datasets '''
R_ccle_ec, M_ccle_ec, cell_lines, drugs = load_data_without_empty(
    location_data + "ccle_ec50_row_01.txt")
R_ctrp, M_ctrp = load_data_filter(location_data + "ctrp_ec50_row_01.txt",
                                  rows=cell_lines,
                                  columns=None)
R_gdsc, M_gdsc = load_data_filter(location_data + "gdsc_ic50_row_01.txt",
                                  rows=cell_lines,
                                  columns=None)
R_ccle_ic, M_ccle_ic = load_data_filter(location_data + "ccle_ic50_row_01.txt",
                                        rows=cell_lines,
                                        columns=None)

R_concat = numpy.concatenate((R_ccle_ec, R_gdsc, R_ctrp, R_ccle_ic),
                             axis=1)  #columns
M_concat = numpy.concatenate((M_ccle_ec, M_gdsc, M_ctrp, M_ccle_ic),
                             axis=1)  #columns
_, no_columns = R_ccle_ec.shape
''' Remove entirely empty columns, due to the other three datasets that we concatenate '''
Пример #3
0
from HMF.code.models.nmf_np import nmf_np
from HMF.code.cross_validation.multiple_nmf_nested_matrix_cross_validation import MultipleNMFNestedCrossValidation
from HMF.drug_sensitivity.load_dataset import load_data_without_empty, load_data_filter

import numpy, random

''' Load datasets '''
location = project_location+"HMF/drug_sensitivity/data/overlap/"
location_data =                 location+"data_row_01/"
location_features_drugs =       location+"features_drugs/"
location_features_cell_lines =  location+"features_cell_lines/"
location_kernels =              location+"kernels_features/"

''' Concatenate the datasets by ROWS. We remove the columns of the other datasets '''
R_gdsc,     M_gdsc,     cell_lines, drugs = load_data_without_empty(location_data+"gdsc_ic50_row_01.txt")
R_ctrp,     M_ctrp                        = load_data_filter(location_data+"ctrp_ec50_row_01.txt",rows=None,columns=drugs)
R_ccle_ec,  M_ccle_ec                     = load_data_filter(location_data+"ccle_ec50_row_01.txt",rows=None,columns=drugs)
R_ccle_ic,  M_ccle_ic                     = load_data_filter(location_data+"ccle_ic50_row_01.txt",rows=None,columns=drugs)

R_concat = numpy.concatenate((R_gdsc,R_ctrp,R_ccle_ec,R_ccle_ic),axis=0) #rows
M_concat = numpy.concatenate((M_gdsc,M_ctrp,M_ccle_ec,M_ccle_ic),axis=0) #rows
no_rows, _ = R_gdsc.shape

''' Remove entirely empty rows, due to the other three datasets that we concatenate '''
def remove_empty_rows(R,M):
    new_R, new_M = [], []
    for i,sum_row in enumerate(M.sum(axis=1)):
        if sum_row > 0:
            new_R.append(R[i])
            new_M.append(M[i])
    return numpy.array(new_R), numpy.array(new_M)
Пример #4
0
''' Model settings '''
n_estimators = 100 # number of trees
max_depth = None    # until what depth of feature splits we go


''' Load datasets '''
location = project_location+"HMF/drug_sensitivity/data/overlap/"
location_data =                 location+"data_row_01/"
location_features_drugs =       location+"features_drugs/"
location_features_cell_lines =  location+"features_cell_lines/"
location_kernels =              location+"kernels_features/"

R_main, M_main, cell_lines, drugs = load_data_without_empty(location_data+"gdsc_ic50_row_01.txt")

R_cnv,      M_cnv =      load_data_filter(location_features_cell_lines+"cnv.txt",                 cell_lines)
#R_cnv_std,  M_cnv_std =  load_data_filter(location_features_cell_lines+"cnv_std.txt",             cell_lines)
R_mutation, M_mutation = load_data_filter(location_features_cell_lines+"mutation.txt",            cell_lines)
#R_ge,       M_ge =       load_data_filter(location_features_cell_lines+"gene_expression.txt",     cell_lines)
#R_ge_std,   M_ge_std =   load_data_filter(location_features_cell_lines+"gene_expression_std.txt", cell_lines)

R_fp,       M_fp =       load_data_filter(location_features_drugs+"drug_fingerprints.txt", drugs)
R_targets,  M_targets =  load_data_filter(location_features_drugs+"drug_targets.txt",      drugs)
R_1d2d,     M_1d2d =     load_data_filter(location_features_drugs+"drug_1d2d.txt",         drugs)
#R_1d2d_std, M_1d2d_std = load_data_filter(location_features_drugs+"drug_1d2d_std.txt",     drugs)

features_drugs = [R_fp, R_targets, R_1d2d]
features_cell_lines = [R_cnv, R_mutation]


''' Split the mask M into folds '''
Пример #5
0
from HMF.code.cross_validation.cross_validation_hmf import CrossValidation
from HMF.drug_sensitivity.load_dataset import load_data_without_empty, load_data_filter

import numpy, random
''' Load datasets '''
location = project_location + "HMF/drug_sensitivity/data/overlap/"
location_data = location + "data_row_01/"
location_features_drugs = location + "features_drugs/"
location_features_cell_lines = location + "features_cell_lines/"
location_kernels = location + "kernels_features/"

R_gdsc, M_gdsc, cell_lines, drugs = load_data_without_empty(
    location_data + "gdsc_ic50_row_01.txt")

C_cnv_std, M_cnv_std = load_data_filter(location_kernels + "cnv_std.txt",
                                        cell_lines, cell_lines)
C_ge_std, M_ge_std = load_data_filter(
    location_kernels + "gene_expression_std.txt", cell_lines, cell_lines)
C_mutation, M_mutation = load_data_filter(location_kernels + "mutation.txt",
                                          cell_lines, cell_lines)

C_1d2d_std, M_1d2d_std = load_data_filter(
    location_kernels + "drug_1d2d_std.txt", drugs, drugs)
C_fp, M_fp = load_data_filter(location_kernels + "drug_fingerprints.txt",
                              drugs, drugs)
C_targets, M_targets = load_data_filter(location_kernels + "drug_targets.txt",
                                        drugs, drugs)
''' Settings HMF '''
iterations, burn_in, thinning = 100, 80, 2
no_folds = 10