def load_data(params, seed): drop_cols = ['case_id'] onehot_cols = ['cancer_type'] y_cols = ['cancer_type'] if params['use_landmark_genes']: lincs_file = 'lincs1000.tsv' lincs_path = p1_common.get_p1_file(url_p1b1 + lincs_file) df_l1000 = pd.read_csv(lincs_path, sep='\t') x_cols = df_l1000['gdc'].tolist() drop_cols = None else: x_cols = None train_path = p1_common.get_p1_file(url_p1b1 + file_train) test_path = p1_common.get_p1_file(url_p1b1 + file_test) return p1_common.load_csv_data(train_path, test_path, x_cols=x_cols, y_cols=y_cols, drop_cols=drop_cols, onehot_cols=onehot_cols, n_cols=params['feature_subsample'], shuffle=params['shuffle'], scaling=params['scaling'], dtype=params['datatype'], validation_split=params['validation_split'], return_dataframe=False, return_header=True, seed=seed)
def load_data_orig(params, seed): if params['with_type']: drop_cols = ['case_id'] onehot_cols = ['cancer_type'] else: drop_cols = ['case_id', 'cancer_type'] onehot_cols = None if params['use_landmark_genes']: lincs_file = 'lincs1000.tsv' lincs_path = p1_common.get_p1_file(url_p1b1 + lincs_file) df_l1000 = pd.read_csv(lincs_path, sep='\t') usecols = df_l1000['gdc'] drop_cols = None else: usecols = None return p1_common.load_X_data(url_p1b1, file_train, file_test, drop_cols=drop_cols, onehot_cols=onehot_cols, usecols=usecols, n_cols=params['feature_subsample'], shuffle=params['shuffle'], scaling=params['scaling'], validation_split=params['validation_split'], dtype=params['datatype'], seed=seed)
def stage_data(): server = 'http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B3/' cell_expr_path = p1_common.get_p1_file(server+'P1B3_cellline_expressions.tsv') cell_mrna_path = p1_common.get_p1_file(server+'P1B3_cellline_mirna.tsv') cell_prot_path = p1_common.get_p1_file(server+'P1B3_cellline_proteome.tsv') cell_kino_path = p1_common.get_p1_file(server+'P1B3_cellline_kinome.tsv') drug_desc_path = p1_common.get_p1_file(server+'P1B3_drug_descriptors.tsv') drug_auen_path = p1_common.get_p1_file(server+'P1B3_drug_latent.csv') dose_resp_path = p1_common.get_p1_file(server+'P1B3_dose_response.csv') test_cell_path = p1_common.get_p1_file(server+'P1B3_test_celllines.txt') test_drug_path = p1_common.get_p1_file(server+'P1B3_test_drugs.txt') return(cell_expr_path, cell_mrna_path, cell_prot_path, cell_kino_path, drug_desc_path, drug_auen_path, dose_resp_path, test_cell_path, test_drug_path)
def get_file(url): return p1_common.get_p1_file(url)