Exemplo n.º 1
0
def fancy_text_model(x_train, y_train, x_test, x_valid, cache_name, use_cache=False):
    if use_cache:
        fhand = open(cache_name, 'r')
        data_dict = pickle.load(fhand)
        return data_dict['test_pred'], data_dict['valid_pred']
    np.random.seed(seed=123)
    model = PassiveAggressiveRegressor(n_iter=100, C=1, shuffle=True, random_state=123)
    model.fit(x_train, y_train)
    test_pred = model.predict(x_test)
    valid_pred = model.predict(x_valid)
    data_dict = {'test_pred': test_pred, 'valid_pred': valid_pred}
    fhand = open(cache_name, 'w')
    pickle.dump(data_dict, fhand)
    fhand.close()
    return test_pred, valid_pred
Exemplo n.º 2
0
def mcFadden_R2(y_true, y_pred):
    constant_feature = pd.DataFrame(np.full(len(y_true), 1))
    logistic_regression = PassiveAggressiveRegressor()
    logistic_regression.fit(constant_feature, y_true)
    null_model_prediction = logistic_regression.predict(constant_feature)
    print('avg log-likelihood null-model: {}'.format(
        log_likelihood(y_true, null_model_prediction)))

    L = log_likelihood(y_true, y_pred)
    L_null = log_likelihood(y_true, null_model_prediction)
    return 1 - L / L_null
Exemplo n.º 3
0
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            reg = PassiveAggressiveRegressor(C=1.0, n_iter=50,
                                             fit_intercept=fit_intercept,
                                             random_state=0)
            reg.fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        reg = PassiveAggressiveRegressor(C=1.0,
                                         fit_intercept=True,
                                         random_state=0)
        for t in xrange(50):
            reg.partial_fit(data, y_bin)
        pred = reg.predict(data)
        assert_less(np.mean((pred - y_bin)**2), 1.7)
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            reg = PassiveAggressiveRegressor(C=1.0, n_iter=50,
                                             fit_intercept=fit_intercept,
                                             random_state=0)
            reg.fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
            reg = PassiveAggressiveRegressor(C=1.0,
                                             fit_intercept=True,
                                             random_state=0)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
class _PassiveAggressiveRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Exemplo n.º 8
0
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for average in (False, True):
            reg = PassiveAggressiveRegressor(random_state=0,
                average=average, max_iter=100)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert np.mean((pred - y_bin) ** 2) < 1.7
            if average:
                assert hasattr(reg, 'average_coef_')
                assert hasattr(reg, 'average_intercept_')
                assert hasattr(reg, 'standard_intercept_')
                assert hasattr(reg, 'standard_coef_')
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            for average in (False, True):
                reg = PassiveAggressiveRegressor(
                    C=1.0, fit_intercept=fit_intercept,
                    random_state=0, average=average, max_iter=5)
                reg.fit(data, y_bin)
                pred = reg.predict(data)
                assert_less(np.mean((pred - y_bin) ** 2), 1.7)
                if average:
                    assert_true(hasattr(reg, 'average_coef_'))
                    assert_true(hasattr(reg, 'average_intercept_'))
                    assert_true(hasattr(reg, 'standard_intercept_'))
                    assert_true(hasattr(reg, 'standard_coef_'))
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for average in (False, True):
            reg = PassiveAggressiveRegressor(
                C=1.0, fit_intercept=True, random_state=0,
                average=average, max_iter=100)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
            if average:
                assert hasattr(reg, 'average_coef_')
                assert hasattr(reg, 'average_intercept_')
                assert hasattr(reg, 'standard_intercept_')
                assert hasattr(reg, 'standard_coef_')
Exemplo n.º 11
0
def test_regressor_mse():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for fit_intercept in (True, False):
            for average in (False, True):
                reg = PassiveAggressiveRegressor(
                    C=1.0, fit_intercept=fit_intercept,
                    random_state=0, average=average, max_iter=5)
                reg.fit(data, y_bin)
                pred = reg.predict(data)
                assert np.mean((pred - y_bin) ** 2) < 1.7
                if average:
                    assert hasattr(reg, 'average_coef_')
                    assert hasattr(reg, 'average_intercept_')
                    assert hasattr(reg, 'standard_intercept_')
                    assert hasattr(reg, 'standard_coef_')
Exemplo n.º 12
0
def test_regressor_partial_fit():
    y_bin = y.copy()
    y_bin[y != 1] = -1

    for data in (X, X_csr):
        for average in (False, True):
            reg = PassiveAggressiveRegressor(C=1.0,
                                             fit_intercept=True,
                                             random_state=0,
                                             average=average)
            for t in range(50):
                reg.partial_fit(data, y_bin)
            pred = reg.predict(data)
            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
            if average:
                assert_true(hasattr(reg, 'average_coef_'))
                assert_true(hasattr(reg, 'average_intercept_'))
                assert_true(hasattr(reg, 'standard_intercept_'))
                assert_true(hasattr(reg, 'standard_coef_'))
Exemplo n.º 13
0
                           n_features=5,
                           random_state=1000)

    # Create the model
    par = PassiveAggressiveRegressor(C=0.01,
                                     loss='squared_epsilon_insensitive',
                                     epsilon=0.001,
                                     max_iter=2000,
                                     random_state=1000)

    # Fit the model incrementally and collect the squared errors
    squared_errors = []

    for (x, y) in zip(X, Y):
        par.partial_fit(x.reshape(1, -1), y.ravel())
        y_pred = par.predict(x.reshape(1, -1))
        squared_errors.append(np.power(y_pred - y, 2))

    # Show the error plot
    fig, ax = plt.subplots(figsize=(18, 8))

    ax.plot(squared_errors)
    ax.set_xlabel('Sample')
    ax.set_ylabel('Squared error')
    ax.grid()

    plt.show()

    # Repeat the example with a discontinuous dataset
    X1, Y1 = make_regression(n_samples=nb_samples_2,
                             n_features=5,
Exemplo n.º 14
0
def main():
    X, y, coef = make_regression(1000, 200, 10, 1, noise=0.05, coef=True,
                                 random_state=42)

    # X = np.column_stack((X, np.ones(X.shape[0])))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                        random_state=42)

    # sca = StandardScaler()
    # sca.fit(X_train)
    # X_train = sca.transform(X_train)
    # X_test = sca.transform(X_test)

    # print X.shape
    # print y.shape
    # print coef.shape

    param_grid = {
        "C": [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10,
              100, 1000],
        "epsilon": [0.0001, 0.001, 0.01, 0.1]}

    param_grid_kern = {
        "C": [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10,
              100, 1000],
        "epsilon": [0.0001, 0.001, 0.01, 0.1],
        "gamma": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]}
    # "loss": ["pa", "pai", "paii"]}}

    my_pa = PARegressor(loss="paii", C=1, epsilon=0.001, n_iter=1,
                        fit_intercept=False)
    #
    # search = GridSearchCV(my_pa, param_grid,
    #                       scoring='mean_absolute_error', n_jobs=8, iid=True, refit=True, cv=5,
    #                       verbose=1)
    # search.fit(X_train, y_train)
    # print search.best_params_

    my_pa.fit(X_train, y_train)
    print my_pa.coef_

    # y_preds = search.predict(X_test)
    y_preds = my_pa.predict(X_test)

    mae_my_pa = mean_absolute_error(y_test, y_preds)
    print "My PA MAE = %2.4f" % mae_my_pa

    my_kpa_linear = KernelPARegressor(kernel="linear", loss="paii", C=1, epsilon=0.001, n_iter=1, fit_intercept=False)
    my_kpa_linear.fit(X_train, y_train)
    print "alphas", len(my_kpa_linear.alphas_), my_kpa_linear.alphas_
    y_preds = my_kpa_linear.predict(X_test)
    mae_kpa_linear = mean_absolute_error(y_test, y_preds)
    print "My KPA linear MAE = %2.4f" % mae_kpa_linear

    my_kpa_rbf = KernelPARegressor(kernel="rbf", loss="paii", gamma=0.001, C=1, epsilon=0.001, n_iter=1, fit_intercept=False)
    # search = GridSearchCV(my_kpa_rbf, param_grid_kern,
    #                       scoring='mean_absolute_error', n_jobs=8, iid=True, refit=True, cv=5,
    #                       verbose=1)
    # search.fit(X_train, y_train)

    my_kpa_rbf.fit(X_train, y_train)
    print "alphas", len(my_kpa_rbf.alphas_), my_kpa_rbf.alphas_
    print "support", len(my_kpa_rbf.support_)
    # print "alphas", len(search.best_estimator_.alphas_)  # , my_kpa_rbf.alphas_
    # print "support", len(search.best_estimator_.support_)
    # print search.best_params_
    y_preds = my_kpa_rbf.predict(X_test)
    # y_preds = search.predict(X_test)
    mae_my_kpa = mean_absolute_error(y_test, y_preds)
    print "My Kernel PA MAE = %2.4f" % mae_my_kpa

    # print search.best_estimator_
    # print np.corrcoef(search.best_estimator_.coef_, coef)

    # param_grid = {
    # "C": [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 10,
    #           100, 1000, 10000],
    #     "epsilon": [0.0001, 0.001, 0.01, 0.1],
    #     # "loss": ["epsilon_insensitive", "squared_epsilon_insensitive"]}
    #     "loss": ["squared_epsilon_insensitive"]}


    # search = GridSearchCV(PassiveAggressiveRegressor(fit_intercept=True),
    # param_grid, scoring='mean_absolute_error', n_jobs=8, iid=True,
    # refit=True, cv=5, verbose=1)
    # search.fit(X_train, y_train)

    sk_pa = PassiveAggressiveRegressor(loss="squared_epsilon_insensitive", C=1,
                                       epsilon=0.001, n_iter=1,
                                       fit_intercept=False,
                                       warm_start=True)
    for i in xrange(X_train.shape[0]):
        # for x_i, y_i in zip(X_train, y_train):
        x = np.array(X_train[i], ndmin=2)
        y = np.array(y_train[i], ndmin=1)
        # print x.shape
        # print y
        sk_pa.partial_fit(x, y)

    # sk_pa.fit(X_train, y_train)

    # y_preds = search.predict(X_test)
    y_preds = sk_pa.predict(X_test)
    mae_sk_pa = mean_absolute_error(y_preds, y_test)
    print "Sklearn PA MAE = %2.4f" % mae_sk_pa
Exemplo n.º 15
0
            暂时未知
        对于算法的具体过程还不是很清楚,所以暂时作为一个黑箱吧

'''
rg = PassiveAggressiveRegressor(C=1.0,
                                fit_intercept=True,
                                n_iter=5,
                                shuffle=True,
                                verbose=0,
                                loss='epsilon_insensitive',
                                epsilon=0.1,
                                random_state=None,
                                warm_start=False)
rg.fit(X_train, Y_train)
rg.partial_fit(X_train, Y_train)  # 增量学习
Y_pre = rg.predict(X_test)
rg.score(X_test, Y_test)
rg.coef_
rg.intercept_
'''
    C                           正则化项系数 
    fit_intercept               是否计算截距
    n_iter                      迭代次数
    shuffle                     是否洗牌
    verbose                     哈
    loss                        损失函数
    epsilon                     阈值
    random_state                随机器
    warm_start=False            新的迭代开始后,是否用上一次的最后结果作为初始化
'''
Exemplo n.º 16
0
br.fit(x, y)
br_sts_scores = br.predict(xt)

# Elastic Net
print 'elastic net'
enr = ElasticNet()
#enr.fit(x[:, np.newaxis], y)
#enr_sts_scores = enr.predict(xt[:, np.newaxis])
enr.fit(x, y)
enr_sts_scores = enr.predict(xt)

# Passive Aggressive Regression
print 'passive aggressive'
par = PassiveAggressiveRegressor()
par.fit(x, y)
par_sts_scores = par.predict(xt)
#par.fit(x[:, np.newaxis], y)
#par_sts_scores = par.predict(xt[:, np.newaxis])

# RANSAC Regression
print 'ransac'
ransac = RANSACRegressor()
#ransac.fit(x[:, np.newaxis], y)
#ransac_sts_scores = ransac.predict(xt[:, np.newaxis])
ransac.fit(x, y)
ransac_sts_scores = ransac.predict(xt)

# Logistic Regression
print 'logistic'
lgr = LogisticRegression()
#lgr.fit(x[:, np.newaxis], y)
Exemplo n.º 17
0
                                                  axis='columns')

            #Convert features to sparse matrices
            validation_features = csr_matrix(validation_features.values)

            #Combine sparse arrays
            validation = hstack([
                validation_summary_dtm, validation_text_dtm,
                validation_features
            ])

            #Scale
            validation = scaler.transform(validation)

            #Predict
            validation_pred_chunk = lm.predict(validation)

            validation_pred.extend(validation_pred_chunk)
            validation_score.extend(list(validation_score_chunk))

        #Evaluate and save validation score
        fold_scores.append(
            mean_squared_error(validation_score, validation_pred))

    ###Train on entire evaluation set

    #Instantiate hashing vectorizers
    text_vectorizer = HashingVectorizer(ngram_range=(1, 3), n_features=2**20)
    summary_vectorizer = HashingVectorizer(ngram_range=(1, 3),
                                           n_features=2**20)
Exemplo n.º 18
0
# Filter by coeficient variation
var_thres = VarianceThreshold(best_var).fit(X_train_pre)
X_train_pre = var_thres.transform(X_train_pre)
X_test_pre = var_thres.transform(X_test_pre)

for gene in genes:
    # Assemble prediction variables
    X_train = X_train_pre
    y_train = train_ess.ix[:, gene]
    X_test = X_test_pre

    # Feature selection
    fs = SelectKBest(f_regression, k=best_k).fit(X_train, y_train)
    X_train = fs.transform(X_train)
    X_test = fs.transform(X_test)

    # Estimation
    clf = PassiveAggressiveRegressor(epsilon=best_epsilon, n_iter=best_n_iter).fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Store results
    predictions.ix[gene] = y_pred

    print gene

filename = save_gct_data(predictions, submission_filename_prefix)
print '[DONE]: Saved to file ' + filename

submit_solution(filename, filename.split('/')[1], ev_code_sc1)
print '[SUBMITED]'
Exemplo n.º 19
0
	name_folder = folder.split("/")[6]
	train_data = np.array(pd.read_csv('train_data.csv', sep= ';'))
	test_data = np.array(pd.read_csv('test_data.csv', sep= ';'))
	train_labels = np.array(pd.read_csv('train_labels.csv', sep= ';'))
	test_labels = np.array(pd.read_csv('test_labels.csv', sep= ';'))

	inicio = time.time()

	# importar o modelo de regressão
	from sklearn.linear_model import PassiveAggressiveRegressor

	# treinar o modelo no conjunto de dados
	regression = PassiveAggressiveRegressor().fit(train_data, train_labels)

	# prever 
	predictions_labels = regression.predict(test_data)

	fim = time.time()
	df_time = pd.DataFrame({'Execution Time:' : [fim-inicio]})

	output_path = os.path.join('/home/isadorasalles/Documents/Regressao/passive_aggressive', 'time_'+name_folder)
	df_time.to_csv(output_path, sep=';')

	from sklearn import metrics

	df_metrics = pd.DataFrame({'Mean Absolute Error' : [metrics.mean_absolute_error(test_labels, predictions_labels)], 'Mean Squared Error' : [metrics.mean_squared_error(test_labels, predictions_labels)],  
		'Root Mean Squared Error': [np.sqrt(metrics.mean_squared_error(test_labels, predictions_labels))], 'R2 Score': [metrics.r2_score(test_labels, predictions_labels)]})

	output_path = os.path.join('/home/isadorasalles/Documents/Regressao/passive_aggressive', 'metrics_'+name_folder)
	df_metrics.to_csv(output_path, sep=';')
Exemplo n.º 20
0
#y = [0.5, 2.5]
#clf = MLPRegressor(hidden_layer_sizes=(1000,), random_state=1, max_iter=1, warm_start=True)
clf = PassiveAggressiveRegressor(random_state=1, warm_start=True, max_iter=100)
st = time.time()
ttList = []
for i in range(len(Xtrain)):
    tt = time.time()
    clf = clf.partial_fit(Xtrain, Ytrain)
    print(i / len(Xtrain))
    ttList.append(time.time() - tt)
trainTime = time.time() - st
joblib.dump(clf, 'currentModel.mod')
pred = []
st = time.time()
for x in range(len(Xtest)):
    pred.append(clf.predict([Xtest[x]]))
score = metrics.mean_absolute_error(Ytest, clf.predict(Xtest))
predictTime = time.time() - st
print("Time to import: " + str(importTime))
print("Time to Read File: " + str(readFileTime))
print("Time to split and Scale Data: " + str(scaleTime))
print("Time to train: " + str(trainTime))
print("Time to predict: " + str(time.time() - st))
print("Mean ABS error: %f" % score)

fig, ax = plt.subplots()
ax.plot(pred)
ax.plot(Ytest)

plt.show()
Exemplo n.º 21
0
ARDRegression
BayesianRidge
ElasticNet
ElasticNetCV
Hinge
Huber
Lars
LarsCV
Lasso
LassoCV
LassoLars
LassoLarsCV
LassoLarsIC
PassiveAggressiveRegressor
Ridge
SGDRegressor
LinearRegression
ModifiedHuber
MultiTaskElasticNet
"""

print "training using PassiveAggressiveRegressor"
par = PassiveAggressiveRegressor()
par.fit(quesparse,y)
pred = par.predict(tquesparse)
pred[pred<0] = 0
#for i in range(q):
#    temp = dict()
#    temp['__ans__'] = pred[i]
#    temp['question_key'] = tquestion_key[i]
#    print """{"__ans__": %s, "question_key":"%s"}""" % (temp['__ans__'], temp["question_key"])
Exemplo n.º 22
0
cfscaler = preprocessing.StandardScaler().fit(contextfollowers)
tfscaler = preprocessing.StandardScaler().fit(topicsfollowers)

quesparse = quevectorizer.fit_transform(question)
topsparse = topvectorizer.fit_transform(topics)
cfscaled = cfscaler.transform(contextfollowers)
tfscaled = tfscaler.transform(topicsfollowers)

tquesparse = quevectorizer.transform(tquestion)
ttopsparse = topvectorizer.transform(ttopics)
tcfscaled = cfscaler.transform(tcontextfollowers)
ttfscaled = tfscaler.transform(ttopicsfollowers)

par = PassiveAggressiveRegressor()
par.fit(topsparse, y)
pred = par.predict(ttopsparse)
pred[pred < 0] = 0

temp = pl.figure("train y")
temp = pl.subplot(2, 1, 1)
temp = pl.hist(y, 1000)
temp = pl.subplot(2, 1, 2)
yy = y.copy()
yy[yy == 0] = 1
temp = pl.hist(np.log10(yy), 1000)

temp = pl.figure("test y")
temp = pl.subplot(4, 1, 1)
temp = pl.hist(pred, 1000)
temp = pl.subplot(4, 1, 2)
yy = pred.copy()
Exemplo n.º 23
0
# Assemble prediction variables
X_train = X_train_pre.loc[:, important_features_top_100]
X_test = X_test_pre.loc[:, important_features_top_100]

for gene in prioritized_genes:
    y_train = train_ess.ix[:, gene]

    y_preds_test = []
    y_preds_scores = []

    # Training
    cv = ShuffleSplit(len(y_train), n_iter=5)
    for train_i, test_i in cv:
        clf = PassiveAggressiveRegressor(epsilon=0.01, n_iter=7).fit(X_train.ix[train_i, :], y_train[train_i])
        y_preds_scores.append(spearm_cor_func(clf.predict(X_train.ix[test_i, :]), y_train[test_i]))
        y_preds_test.append(clf.predict(X_test))

    y_preds_scores = Series(y_preds_scores)
    y_preds_test = DataFrame(y_preds_test)

    # Predict
    y_pred = np.mean(y_preds_test[y_preds_scores.notnull()], axis=0).values

    print gene, X_train.shape

    # Store results
    predictions.ix[gene] = y_pred

filename_gct = save_gct_data(predictions, submission_filename_prefix)
print '[DONE]: Saved to file ' + filename_gct
from sklearn.model_selection import train_test_split


setX, setY = importcsv()
X_train, X_test, y_train, y_test = train_test_split(setX, setY, test_size=0.01, random_state=42)

regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,tol=1e-3)
regr.fit(X_train, y_train)

#PassiveAggressiveRegressor(C=1.0, average=False, early_stopping=False,epsilon=0.1, fit_intercept=True, loss='epsilon_insensitive', max_iter=100, n_iter_no_change=5, random_state=0,shuffle=True, tol=0.001, validation_fraction=0.1,verbose=0, warm_start=False)


print(regr.score(X_test, y_test))

regr.densify()
pred = regr.predict(X_test)


result=[]

for k in range(len(pred)):
    if pred[k]<0.1:
        value = 0
    else:
        value =1
    if value == y_test[k]:
        result.append('O')
    else:
        result.append('X')

print(pred)
        train, test = shuffle(train).astype(np.float32), shuffle(test).astype(np.float32)

        train_x, train_y = train.iloc[:, 1:], train.iloc[:, 0]
        test_x, test_y = test.iloc[:, 1:], test.iloc[:, 0]

        dg.add_drift(0.5, True)

        n, d = train_x.shape
        _n += n
        for i in range(0, n, batch_size):
            count += 1
            pa.partial_fit(train_x[i:i + batch_size], train_y[i:i + batch_size])
            sgd.partial_fit(train_x[i:i + batch_size], train_y[i:i + batch_size])

            if i % 50 == 0 or i == n - 1:
                pred1 = pa.predict(test_x)
                pred2 = sgd.predict(test_x)

                mse1, mae1 = np.sqrt(mean_squared_error(test_y, pred1)), mean_absolute_error(test_y, pred1)
                mse2, mae2 = np.sqrt(mean_squared_error(test_y, pred2)), mean_absolute_error(test_y, pred2)

                pa_mse_arr.append(mse1)
                pa_mae_arr.append(mae1)
                sgd_mse_arr.append(mse2)
                sgd_mae_arr.append(mae2)
                count_arr.append(count)

                print("pa %f %f, sgd %f %f" % (mse1, mae1, mse2, mae2))
        print("writing stats to directory : %s" % directory1)

        err_df = pd.DataFrame({"count": count_arr, "mse": pa_mse_arr, "mae": pa_mae_arr})
Exemplo n.º 26
0
quesparse = quevectorizer.fit_transform(question)
topsparse = topvectorizer.fit_transform(topics)
cfscaled = cfscaler.transform(contextfollowers)
tfscaled = tfscaler.transform(topicsfollowers)

tquesparse = quevectorizer.transform(tquestion)
ttopsparse = topvectorizer.transform(ttopics)
tcfscaled = cfscaler.transform(tcontextfollowers)
ttfscaled = tfscaler.transform(ttopicsfollowers)



par = PassiveAggressiveRegressor()
par.fit(topsparse,y)
pred = par.predict(ttopsparse)
pred[pred<0] = 0


temp = pl.figure("train y")
temp = pl.subplot(2,1,1)
temp = pl.hist(y,1000)
temp = pl.subplot(2,1,2)
yy = y.copy()
yy[yy==0] = 1
temp = pl.hist(np.log10(yy),1000)

temp = pl.figure("test y")
temp = pl.subplot(4,1,1)
temp = pl.hist(pred,1000)
temp = pl.subplot(4,1,2)
Exemplo n.º 27
0

# Elastic Net
print 'elastic net'
enr = ElasticNet()
#enr.fit(x[:, np.newaxis], y)
#enr_sts_scores = enr.predict(xt[:, np.newaxis])
enr.fit(x, y)
enr_sts_scores = enr.predict(xt)


# Passive Aggressive Regression
print 'passive aggressive'
par = PassiveAggressiveRegressor()
par.fit(x, y)
par_sts_scores = par.predict(xt)
#par.fit(x[:, np.newaxis], y)
#par_sts_scores = par.predict(xt[:, np.newaxis])

# RANSAC Regression
print 'ransac'
ransac = RANSACRegressor()
#ransac.fit(x[:, np.newaxis], y)
#ransac_sts_scores = ransac.predict(xt[:, np.newaxis])
ransac.fit(x, y)
ransac_sts_scores = ransac.predict(xt)


# Logistic Regression
print 'logistic'
lgr = LogisticRegression()
Exemplo n.º 28
0
                                  C=1.0,
                                  average=False,
                                  epsilon=0.1,
                                  fit_intercept=True,
                                  loss='epsilon_insensitive',
                                  max_iter=None,
                                  n_iter=None,
                                  shuffle=True,
                                  tol=None,
                                  verbose=0,
                                  warm_start=False)
regr.fit(X, y)
print(regr.score(x_train, y_train))  #Train Error: 32.86
#PassiveAggressiveRegressor()

predictions = regr.predict(x_test)
for i, prediction in enumerate(predictions):
    print 'Predicted: %s' % (prediction)

############################################################################################################
#Support Vector Machine Regression
from sklearn import svm
clf1 = svm.SVR(C=1.0,
               cache_size=200,
               coef0=0.0,
               degree=8,
               epsilon=0.1,
               gamma='auto',
               kernel='linear',
               max_iter=-1,
               shrinking=True,
# In[56]:

model2.fit(x_train, y_train)

# In[57]:

model3.fit(x_train, y_train)

# In[58]:

pred1 = model1.predict(x_test)
pred1

# In[59]:

pred2 = model2.predict(x_test)
pred2

# In[60]:

pred3 = model3.predict(x_test)
pred3

# In[61]:

from sklearn.metrics import r2_score

# In[62]:

acc1 = r2_score(y_test, pred1)
acc1 * 100