def mcFadden_R2(y_true, y_pred): constant_feature = pd.DataFrame(np.full(len(y_true), 1)) logistic_regression = PassiveAggressiveRegressor() logistic_regression.fit(constant_feature, y_true) null_model_prediction = logistic_regression.predict(constant_feature) print('avg log-likelihood null-model: {}'.format( log_likelihood(y_true, null_model_prediction))) L = log_likelihood(y_true, y_pred) L_null = log_likelihood(y_true, null_model_prediction) return 1 - L / L_null
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): reg = PassiveAggressiveRegressor(C=1.0, n_iter=50, fit_intercept=fit_intercept, random_state=0) reg.fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): reg = PassiveAggressiveRegressor(C=1.0, n_iter=50, fit_intercept=fit_intercept, random_state=0) reg.fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7)
def test_regressor_correctness(loss): y_bin = y.copy() y_bin[y != 1] = -1 reg1 = MyPassiveAggressive(loss=loss, n_iter=2) reg1.fit(X, y_bin) for data in (X, X_csr): reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2, shuffle=False) reg2.fit(data, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
class _PassiveAggressiveRegressorImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def fancy_text_model(x_train, y_train, x_test, x_valid, cache_name, use_cache=False): if use_cache: fhand = open(cache_name, 'r') data_dict = pickle.load(fhand) return data_dict['test_pred'], data_dict['valid_pred'] np.random.seed(seed=123) model = PassiveAggressiveRegressor(n_iter=100, C=1, shuffle=True, random_state=123) model.fit(x_train, y_train) test_pred = model.predict(x_test) valid_pred = model.predict(x_valid) data_dict = {'test_pred': test_pred, 'valid_pred': valid_pred} fhand = open(cache_name, 'w') pickle.dump(data_dict, fhand) fhand.close() return test_pred, valid_pred
def test_regressor_correctness(loss): y_bin = y.copy() y_bin[y != 1] = -1 reg1 = MyPassiveAggressive( C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg1.fit(X, y_bin) for data in (X, X_csr): reg2 = PassiveAggressiveRegressor( C=1.0, tol=None, loss=loss, fit_intercept=True, max_iter=2, shuffle=False) reg2.fit(data, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): for average in (False, True): reg = PassiveAggressiveRegressor( C=1.0, fit_intercept=fit_intercept, random_state=0, average=average, max_iter=5) reg.fit(data, y_bin) pred = reg.predict(data) assert np.mean((pred - y_bin) ** 2) < 1.7 if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') assert hasattr(reg, 'standard_intercept_') assert hasattr(reg, 'standard_coef_')
def test_regressor_correctness(): y_bin = y.copy() y_bin[y != 1] = -1 for loss in ("epsilon_insensitive", "squared_epsilon_insensitive"): reg1 = MyPassiveAggressive(C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg1.fit(X, y_bin) reg2 = PassiveAggressiveRegressor(C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg2.fit(X, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def test_regressor_mse(): y_bin = y.copy() y_bin[y != 1] = -1 for data in (X, X_csr): for fit_intercept in (True, False): for average in (False, True): reg = PassiveAggressiveRegressor( C=1.0, fit_intercept=fit_intercept, random_state=0, average=average, max_iter=5) reg.fit(data, y_bin) pred = reg.predict(data) assert_less(np.mean((pred - y_bin) ** 2), 1.7) if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') assert hasattr(reg, 'standard_intercept_') assert hasattr(reg, 'standard_coef_')
def test_regressor_correctness(): y_bin = y.copy() y_bin[y != 1] = -1 for loss in ("epsilon_insensitive", "squared_epsilon_insensitive"): reg1 = MyPassiveAggressive(C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg1.fit(X, y_bin) reg2 = PassiveAggressiveRegressor(C=1.0, loss=loss, fit_intercept=True, n_iter=2) reg2.fit(X, y_bin) assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
def train(params=[1.0, 0.001]): global data_products, model_products _C, _epsilon = params for product_id in data_products: data = data_products[product_id].dropna() if len(data.index) <= 0: return X = data[[ 'amount_of_all_competitors', 'average_price_on_market', 'distance_to_cheapest_competitor', 'price_rank', 'quality_rank' ]] y = data['sold'].copy() y[y > 1] = 1 model = PassiveAggressiveRegressor(n_iter=1000) model.set_params(C=_C, epsilon=_epsilon) model.fit(X, y) model_products[product_id] = model
# svm = svm2 = None del svm, svm2 print('SVM Results:') print('BOW Results: ' + per(svm_bow_train) + ' training accuracy, ' + per(svm_bow_test) + ' testing accuracy') print('Bigram Results: ' + per(svm_bigram_train) + ' training accuracy, ' + per(svm_bigram_test) + ' testing accuracy') # Now lets try using passive aggressive classifier: from sklearn.linear_model import PassiveAggressiveClassifier, PassiveAggressiveRegressor pac = PassiveAggressiveClassifier() pac2 = PassiveAggressiveClassifier() par = PassiveAggressiveRegressor() par2 = PassiveAggressiveRegressor() # Now fit pac.fit(train_bow, train_ratings) par.fit(train_bow, train_ratings) pac2.fit(train_bigram, train_ratings) par2.fit(train_bigram, train_ratings) # Record and desplay results pac_bow_train = pac.score(train_bow, train_ratings) pac_bow_test = pac.score(test_bow, test_ratings) pac_bigram_train = pac2.score(train_bigram, train_ratings) pac_bigram_test = pac2.score(test_bigram, test_ratings) par_bow_train = par.score(train_bow, train_ratings) par_bow_test = par.score(test_bow, test_ratings) par_bigram_train = par2.score(train_bigram, train_ratings) par_bigram_test = par2.score(test_bigram, test_ratings) # pac = par = pac2 = par2 = 1 del pac, par, pac2, par2 # Results print('Passive Aggressive Classifier')
print("Training Models") m1 = Ridge(normalize=True, alpha=0.001, solver='auto') m2 = Lasso(normalize=False, alpha=0.0001, selection='cyclic',positive=False) m3 = ElasticNet(normalize=False, alpha=0.0001,positive=False, l1_ratio = 0.2) m4 = PassiveAggressiveRegressor(epsilon=0.001, C=100, shuffle=True) m5 = LinearRegression() m1.fit(Xtrain, Ytrain) print("Model 1 Finished") m2.fit(Xtrain, Ytrain) print("Model 2 Finished") m3.fit(Xtrain, Ytrain) print("Model 3 Finished") m4.fit(Xtrain, Ytrain) print("Model 4 Finished") m5.fit(Xtrain, Ytrain) print("Model 5 Finished") models = [m1, m2, m3, m4, m5] X = np.zeros((Xtest.shape[0], 5)) Xt = np.zeros((Xtr.shape[0], 5)) for i in range(len(models)): y = models[i].predict(Xtest) X[:,i] = np.ravel(y) Xt[:,i] = models[i].predict(Xtr) submit = pd.DataFrame(data={'id': ids, 'quality': Yhat}) submit.to_csv('./submissions/ensemble_m_'+str(i)+'.csv', index = False)
#br_sts_scores = br.predict(xt[:, np.newaxis]) br.fit(x, y) br_sts_scores = br.predict(xt) # Elastic Net print 'elastic net' enr = ElasticNet() #enr.fit(x[:, np.newaxis], y) #enr_sts_scores = enr.predict(xt[:, np.newaxis]) enr.fit(x, y) enr_sts_scores = enr.predict(xt) # Passive Aggressive Regression print 'passive aggressive' par = PassiveAggressiveRegressor() par.fit(x, y) par_sts_scores = par.predict(xt) #par.fit(x[:, np.newaxis], y) #par_sts_scores = par.predict(xt[:, np.newaxis]) # RANSAC Regression print 'ransac' ransac = RANSACRegressor() #ransac.fit(x[:, np.newaxis], y) #ransac_sts_scores = ransac.predict(xt[:, np.newaxis]) ransac.fit(x, y) ransac_sts_scores = ransac.predict(xt) # Logistic Regression print 'logistic' lgr = LogisticRegression()
### from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,random_state=12) np_Xtrain=np.array(X_train.iloc[:,-len(X_train.columns)+1:-1]) np_Xtest=np.array(X_test.iloc[:,-len(X_test.columns)+1:-1]) np_ytest=np.array(y_test.iloc[:,1]) np_ytrain=np.array(y_train.iloc[:,1]) #Performing PAR from sklearn.linear_model import PassiveAggressiveRegressor tolerance=0.5 e=0.1 #If the difference between the current prediction and the correct label is below this threshold, the model is not updated. par=PassiveAggressiveRegressor(tol=tolerance,epsilon=e) par_fitted=par.fit(np_Xtrain,np_ytrain) y2_pred=par_fitted.predict(np_Xtest) from sklearn.metrics import r2_score r2_score_model = r2_score(np_ytest, y2_pred) print(lasso) print("r^2 on test data : %f" % r2_score_model) pd_ypred=pd.DataFrame(y2_pred) pd_ypred.reset_index(drop=True,inplace=True) y_test.reset_index(drop=True,inplace=True) for_plotting=pd.concat([y_test,pd_ypred],axis=1) #Subsetting!??
cfscaler = preprocessing.StandardScaler().fit(contextfollowers) tfscaler = preprocessing.StandardScaler().fit(topicsfollowers) quesparse = quevectorizer.fit_transform(question) topsparse = topvectorizer.fit_transform(topics) cfscaled = cfscaler.transform(contextfollowers) tfscaled = tfscaler.transform(topicsfollowers) tquesparse = quevectorizer.transform(tquestion) ttopsparse = topvectorizer.transform(ttopics) tcfscaled = cfscaler.transform(tcontextfollowers) ttfscaled = tfscaler.transform(ttopicsfollowers) par = PassiveAggressiveRegressor() par.fit(topsparse, y) pred = par.predict(ttopsparse) pred[pred < 0] = 0 temp = pl.figure("train y") temp = pl.subplot(2, 1, 1) temp = pl.hist(y, 1000) temp = pl.subplot(2, 1, 2) yy = y.copy() yy[yy == 0] = 1 temp = pl.hist(np.log10(yy), 1000) temp = pl.figure("test y") temp = pl.subplot(4, 1, 1) temp = pl.hist(pred, 1000) temp = pl.subplot(4, 1, 2)
from sklearn.linear_model import PassiveAggressiveRegressor from sklearn.datasets import make_regression from importation_pandas import importcsv from sklearn.model_selection import train_test_split setX, setY = importcsv() X_train, X_test, y_train, y_test = train_test_split(setX, setY, test_size=0.01, random_state=42) regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,tol=1e-3) regr.fit(X_train, y_train) #PassiveAggressiveRegressor(C=1.0, average=False, early_stopping=False,epsilon=0.1, fit_intercept=True, loss='epsilon_insensitive', max_iter=100, n_iter_no_change=5, random_state=0,shuffle=True, tol=0.001, validation_fraction=0.1,verbose=0, warm_start=False) print(regr.score(X_test, y_test)) regr.densify() pred = regr.predict(X_test) result=[] for k in range(len(pred)): if pred[k]<0.1: value = 0 else: value =1 if value == y_test[k]: result.append('O') else:
br_sts_scores = br.predict(xt) # Elastic Net print 'elastic net' enr = ElasticNet() #enr.fit(x[:, np.newaxis], y) #enr_sts_scores = enr.predict(xt[:, np.newaxis]) enr.fit(x, y) enr_sts_scores = enr.predict(xt) # Passive Aggressive Regression print 'passive aggressive' par = PassiveAggressiveRegressor() par.fit(x, y) par_sts_scores = par.predict(xt) #par.fit(x[:, np.newaxis], y) #par_sts_scores = par.predict(xt[:, np.newaxis]) # RANSAC Regression print 'ransac' ransac = RANSACRegressor() #ransac.fit(x[:, np.newaxis], y) #ransac_sts_scores = ransac.predict(xt[:, np.newaxis]) ransac.fit(x, y) ransac_sts_scores = ransac.predict(xt) # Logistic Regression print 'logistic'
另外一个可以实现增量学习的回归是SGDRegressor 缺点: 暂时未知 对于算法的具体过程还不是很清楚,所以暂时作为一个黑箱吧 ''' rg = PassiveAggressiveRegressor(C=1.0, fit_intercept=True, n_iter=5, shuffle=True, verbose=0, loss='epsilon_insensitive', epsilon=0.1, random_state=None, warm_start=False) rg.fit(X_train, Y_train) rg.partial_fit(X_train, Y_train) # 增量学习 Y_pre = rg.predict(X_test) rg.score(X_test, Y_test) rg.coef_ rg.intercept_ ''' C 正则化项系数 fit_intercept 是否计算截距 n_iter 迭代次数 shuffle 是否洗牌 verbose 哈 loss 损失函数 epsilon 阈值 random_state 随机器 warm_start=False 新的迭代开始后,是否用上一次的最后结果作为初始化
if Y is None: Y = np.array([successRate]) else: addY = np.array([successRate]) Y = np.r_[Y, addY] #整理方程式的输入end print("感知器输入X") #print(X) print("感知器输入Y") print(Y1) #gradientDescent=gradientDescent() #trustValue=gradientDescent.gradientDescent(X,Y) regr = PassiveAggressiveRegressor(max_iter=100, random_state=0, tol=1e-3) regr.fit(X, Y) w = regr.coef_ dim = w.shape[0] trustValue = [] for i in range(0, dim): value = math.exp(w[i]) # exp() 方法返回x的指数,ex。 trustValue.append(value) '''FTRLs=FTRLs(10000) xTrans = X.transpose() # 矩阵转置 trustValue = FTRLs.train(xTrans, Y,100000) #取回信任值''' #FTRL = FTRL(n) #trustValue = FTRL.ftrl(X, Y) #取回信任值 #ogd=OGD(X.shape[1],alpha=0.01) #trustValue=ogd.OGD_(X,Y) print("信任值") print(trustValue)
from sklearn.linear_model import SGDRegressor, PassiveAggressiveRegressor from sklearn.neural_network import MLPRegressor # In[54]: model1 = SGDRegressor() model2 = PassiveAggressiveRegressor() model3 = MLPRegressor() # In[55]: model1.fit(x_train, y_train) # In[56]: model2.fit(x_train, y_train) # In[57]: model3.fit(x_train, y_train) # In[58]: pred1 = model1.predict(x_test) pred1 # In[59]: pred2 = model2.predict(x_test) pred2
quesparse = quevectorizer.fit_transform(question) topsparse = topvectorizer.fit_transform(topics) cfscaled = cfscaler.transform(contextfollowers) tfscaled = tfscaler.transform(topicsfollowers) tquesparse = quevectorizer.transform(tquestion) ttopsparse = topvectorizer.transform(ttopics) tcfscaled = cfscaler.transform(tcontextfollowers) ttfscaled = tfscaler.transform(ttopicsfollowers) par = PassiveAggressiveRegressor() par.fit(topsparse,y) pred = par.predict(ttopsparse) pred[pred<0] = 0 temp = pl.figure("train y") temp = pl.subplot(2,1,1) temp = pl.hist(y,1000) temp = pl.subplot(2,1,2) yy = y.copy() yy[yy==0] = 1 temp = pl.hist(np.log10(yy),1000) temp = pl.figure("test y") temp = pl.subplot(4,1,1) temp = pl.hist(pred,1000)
ARDRegression BayesianRidge ElasticNet ElasticNetCV Hinge Huber Lars LarsCV Lasso LassoCV LassoLars LassoLarsCV LassoLarsIC PassiveAggressiveRegressor Ridge SGDRegressor LinearRegression ModifiedHuber MultiTaskElasticNet """ print "training using PassiveAggressiveRegressor" par = PassiveAggressiveRegressor() par.fit(quesparse,y) pred = par.predict(tquesparse) pred[pred<0] = 0 #for i in range(q): # temp = dict() # temp['__ans__'] = pred[i] # temp['question_key'] = tquestion_key[i] # print """{"__ans__": %s, "question_key":"%s"}""" % (temp['__ans__'], temp["question_key"])
def passive_aggressive_regressor(self): x_train, x_test, y_train, y_test = self.preprocessing() model = PassiveAggressiveRegressor() y_pred = model.fit(x_train, y_train).predict(x_test) self.printing(y_test, y_pred, 'Passive Aggressive')
######################################################################################################################## #Perceptron Model from sklearn.linear_model import PassiveAggressiveRegressor regr = PassiveAggressiveRegressor(random_state=0, C=1.0, average=False, epsilon=0.1, fit_intercept=True, loss='epsilon_insensitive', max_iter=None, n_iter=None, shuffle=True, tol=None, verbose=0, warm_start=False) regr.fit(X, y) print(regr.score(x_train, y_train)) #Train Error: 32.86 #PassiveAggressiveRegressor() predictions = regr.predict(x_test) for i, prediction in enumerate(predictions): print 'Predicted: %s' % (prediction) ############################################################################################################ #Support Vector Machine Regression from sklearn import svm clf1 = svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=8, epsilon=0.1,