Пример #1
0
class MultiLasso_model(Lasso_model):
    def __init__(self, train_path, test_path, pred_path):
        super().__init__(train_path, test_path, pred_path)
        self.multiLasso_model = MultiTaskLassoCV(
            alphas=[float(i) * 0.05 for i in range(1, 100)],
            cv=8,
            max_iter=1000000)

    def train(self, X_train, Y_train):
        self.multiLasso_model.fit(X_train, Y_train)

    def pred(self, X_test):
        return self.multiLasso_model.predict(X_test)

    def run(self):
        X_train_PMNF, X_test_PMNF, y_trains, y_tests = super().get_train_test()
        self.train(X_train_PMNF, np.asarray(y_trains).T)
        y_preds = self.pred(X_test_PMNF).T

        print(y_preds.shape, np.asarray(y_tests).shape)

        with open(self.pred_path, "w", newline='') as f:
            csv_writer = csv.writer(f)
            for i in range(len(y_trains)):
                for row in self.data_train_split[i]:
                    csv_writer.writerow(row)

                group = self.data_test_split[i][self.split_train_len:, :]
                for j in range(len(group)):
                    row = np.append(group[j, :], y_preds[i][j])
                    csv_writer.writerow(row)
Пример #2
0
 def lassoCV(self, name):    
     '''
     Lasso Regression
     '''
     sciLasso = MultiTaskLassoCV( 
         fit_intercept=True,
         normalize=False,
         cv=12,
         tol = 0.001 )
     sciLasso.fit(self.X_train, self.Y_train)
     predict_test = sciLasso.predict(self.X_test)
     MSE = mean_squared_error(predict_test,self.Y_test)
     s = "Sci LassoCV            (MSE: %f)" % (MSE)
     print s
     # print  sciLasso.score(self.X_test, self.Y_test)
     print sciLasso.coef_
     print np.nonzero(sciLasso.coef_)
     predict_final = sciLasso.predict(self.X_final)
     genCSV( name + '_MSE' + str(MSE), self.index_final, predict_final )
Пример #3
0
class MultiTaskLassoCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
# print('R2:    ', r2_score(y_test, y_pred_logreg))
# print('MAE:   ', metrics.mean_absolute_error(y_test, y_pred_logreg))
# print('MSE:   ', metrics.mean_squared_error(y_test, y_pred_logreg))
# print('RMSE:  ', np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_logreg))))
# print('variance score:', explained_variance_score(y_test, y_pred_logreg, multioutput='uniform_average'))

# -----------------------------------------------------------------------------
# Method 3: MultiTaskLassoCV regression with 10-fold CV
# -----------------------------------------------------------------------------
print(' ')
print('## 2. Lasso Regression Results ##')
lasso = MultiTaskLassoCV(cv=10, eps=0.01, max_iter=1000)
t = time.time()
lasso.fit(X_train, y_train)
t_lasso = time.time() - t
y_pred_lasso = lasso.predict(X_test)
print('R2:    ', r2_score(y_test, y_pred_lasso))
print('MAE:   ', metrics.mean_absolute_error(y_test, y_pred_lasso))
print('MSE:   ', metrics.mean_squared_error(y_test, y_pred_lasso))
print('RMSE:  ',
      np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_lasso))))
print(
    'variance score: ',
    explained_variance_score(y_test,
                             y_pred_lasso,
                             multioutput='uniform_average'))
print('training time:   ', t_lasso)
#
# # -----------------------------------------------------------------------------
# # Method 4: Ridge regression with 10-fold CV
# # -----------------------------------------------------------------------------
Пример #5
0
## Lasso CV for parameter optimization
t1 = time.time()
alps = np.linspace(.1,.625,15)
model = MultiTaskLassoCV(cv=5, alphas=alps).fit(X_train_reduced, Y_train_raw)
t_lasso_cv = time.time() - t1
print 'time to train', t_lasso_cv

# Display results
m_log_alphas = -np.log10(model.alphas_)

plt.figure()
plt.plot(m_log_alphas, model.mse_path_, ':')
plt.plot(m_log_alphas, model.mse_path_.mean(axis=-1), 'k',
         label='Average across the folds', linewidth=2)
plt.axvline(-np.log10(model.alpha_), linestyle='--', color='k',
            label='alpha: CV estimate')

plt.legend()

plt.xlabel('-log(alpha)')
plt.ylabel('Mean square error')
plt.title('Mean square error on each fold: coordinate descent '
          '(train time: %.2fs)' % t_lasso_cv)
plt.axis('tight')

plt.show()

Y_predicted = model.predict(X_test_reduced)

## Save results to csv
np.savetxt('prediction.csv', Y_predicted, fmt='%.5f',delimiter=',')
Пример #6
0
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试MultiTaskLassoCV类**********"
    # 在初始化MultiTaskLassoCV类时, 提供一组备选的α值, MultiTaskLassoCV类会帮我们选择一个合适的α值.
    multiTaskLassoCV = MultiTaskLassoCV(
        alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5)
    # 拟合训练集
    multiTaskLassoCV.fit(train_X, train_Y)
    # 打印最优的α值
    print "最优的alpha值: ", multiTaskLassoCV.alpha_
    # 打印模型的系数
    print "系数:", multiTaskLassoCV.coef_
    print "截距:", multiTaskLassoCV.intercept_
    print '训练集R2: ', r2_score(train_Y, multiTaskLassoCV.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = multiTaskLassoCV.predict(test_X)
    print "测试集得分:", multiTaskLassoCV.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, multiTaskLassoCV.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2
Пример #7
0
    pd.DataFrame(index=y_test.columns, columns=['T-statistic', 'P-value']))
results = results.drop(["count", "unique", "top", "freq"], axis=1)

# Drop cell line id in x_train and y_train if one of the drug responses is NaN
# y_train = y_train.dropna() # Drop rows of y_train
# non_null_ids = y_train.index # Get cell line ids that don't have null drug responses
# x_train = x_train[x_train.index.isin(non_null_ids)]

# Create multitask lasso model
print("Fitting " + model_name + "...")
# regr = MultiTaskLasso(alpha=0.5).fit(x_train, y_train)
regr = MultiTaskLassoCV(cv=3, n_alphas=10).fit(x_train, y_train)

# Predict y_test
print("Predicting y test...")
y_test_prediction = pd.DataFrame(data=regr.predict(x_test),
                                 index=y_test.index,
                                 columns=y_test.columns)

# For each drug, execute a t-test and store the results
for drug in y_test_binary.columns:

    # Get the drug response vector for a single drug
    y_test_prediction_single = y_test_prediction[
        drug]  # assign column headers to y_test_prediction
    y_test_actual_single = y_test_binary[drug]

    # Get sample groups for category 0 and category 1
    drug_responses_0, drug_responses_1 = get_t_test_groups(
        y_test_actual_single.values, y_test_prediction_single)