def network_lasso(data, response_type='level', ground_truth=None, inhib_targets=None, perfect=True, group_stimuli=False): ''' do lasso. automatically do CV to find best alpha. input: data response_type : (level, rate) ground_truth : adjacency matrix group_stimuli : binary ''' from sklearn import preprocessing, linear_model, cross_validation, metrics # model interventions if supplied an inhib_targets dict if inhib_targets: training_dict = prepare_markov_data(introduce_inhibs(data, inhib_targets=inhib_targets, perfect=perfect), response_type, group_stimuli) else: training_dict = prepare_markov_data(data, response_type, group_stimuli) antibodies = [col for col in data.columns if col not in ['Cell Line', 'Inhibitor', 'Stimulus', 'Timepoint']] stims = set(data['Stimulus']) # fit lasso for each (X,Y) pair A = {} for key in training_dict: X = training_dict[key][0] Y = training_dict[key][1] preprocessing.StandardScaler().fit_transform(X) A[key] = pd.DataFrame(np.zeros((X.shape[1], X.shape[1])), columns=X.columns, index=X.columns) for col in Y.columns: #print col # check if col is not all the identical if len(set(Y[col])) > 1: rgn = linear_model.LassoCV(verbose=False).fit(X, Y[col]) if np.max(rgn.coef_) != 0: A[key].ix[:,col] = np.abs(rgn.coef_) / np.abs(rgn.coef_).max() else: A[key].ix[:,col] = np.zeros((X.shape[1],)) if ground_truth: auc = {} for key in training_dict: auc[key] = score_network(A[key], ground_truth) return A, auc else: return A
'TestInhib3' : ['mTOR_pS2448'], 'TestInhib4' : ['EGFR_pY1068', 'EGFR_pY1173', 'EGFR_pY992'], 'TestInhib5' : []} print '----------- ' + cell_line + ' ------------' data = pd.read_csv('data/{0}_main.csv'.format(cell_line), header=0) inhibs = set(data['Inhibitor']) stims = set(data['Stimulus']) node_list = data.columns[4:] inhib_targets = {'GSK690693' : ['AKT_pT308','AKT_pS473'], 'GSK690693_GSK1120212' : ['AKT_pT308','AKT_pS473','MEK1_pS217_S221']} regGBR= {} scalar = {} td = utilities.prepare_markov_data(utilities.introduce_inhibs(data, inhib_targets=inhib_targets, perfect=True), 'level', group_stimuli=False) for stim in stims: X, Y = td[stim] scalar[stim] = preprocessing.StandardScaler() scalar[stim].fit_transform(X) X.ix[X.ix[:,'Inhib_GSK690693']>0,'Inhib_GSK690693'] = 1 X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']>0,'Inhib_GSK690693_GSK1120212'] = 1 X.ix[X.ix[:,'Inhib_GSK690693']<0,'Inhib_GSK690693'] = 0 X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']<0,'Inhib_GSK690693_GSK1120212'] = 0 n_estimators = 100 max_depth = 3 regGBR[stim] = do_gbr(X, Y, n_estimators=n_estimators, max_depth=max_depth) # predict
uacc812 = pd.read_csv('data/UACC812_main.csv') # load in the network prior curated from the literature prior = pd.read_csv('data/experimental_prior.csv', index_col=0, header=0) ## BT20 ############################################## print '----------- ' + 'BT20' + ' ------------' inhibs = set(bt20['Inhibitor']) stims = set(bt20['Stimulus']) node_list = bt20.columns[4:] inhib_targets = {'GSK690693' : ['AKT_pT308','AKT_pS473'], 'GSK690693_GSK1120212' : ['AKT_pT308','AKT_pS473','MEK1_pS217_S221']} regGBR= {} scalar = {} td_bt20 = utilities.prepare_markov_data(utilities.introduce_inhibs(bt20, inhib_targets=inhib_targets, perfect=True), 'level', group_stimuli=False) for stim in td_bt20: X, Y = td_bt20[stim] scalar[stim] = preprocessing.StandardScaler() scalar[stim].fit_transform(X) X.ix[X.ix[:,'Inhib_GSK690693']>0,'Inhib_GSK690693'] = 1 X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']>0,'Inhib_GSK690693_GSK1120212'] = 1 X.ix[X.ix[:,'Inhib_GSK690693']<0,'Inhib_GSK690693'] = 0 X.ix[X.ix[:,'Inhib_GSK690693_GSK1120212']<0,'Inhib_GSK690693_GSK1120212'] = 0 n_estimators = 100 max_depth = 3 regGBR[stim] = do_gbr(X, Y, n_estimators=n_estimators, max_depth=max_depth) adj = build_adj_matrix(regGBR, node_list, stims) A_true_bt20 = prior for stim in adj: