Python rmse示例，ml_metrics.rmse Python示例

示例#1

0

显示文件

文件： mf_mlp_embed.py 项目： qqgeogor/RecomendationDL

def make_mf_regression(X ,y, qid, X_test, n_round=5,batch_size=1024*6,nb_epoch=10):
    
    u,i = X
    n = u.shape[0]
    '''
    Fit metafeature by @clf and get prediction for test. Assumed that @clf -- regressor
    '''
    mf_tr = np.zeros(u.shape[0])
    mf_te = np.zeros(X_test[:,0].shape[0])
    for i in range(n_round):
        skf = KFold(n, n_folds=2, shuffle=True, random_state=42+i*1000)
        for ind_tr, ind_te in skf:
            clf = build_model()

            u_tr = u[ind_tr]
            u_te = u[ind_te]

            i_tr = i[ind_tr]
            i_te = i[ind_te]
            
            y_tr = y[ind_tr]
            y_te = y[ind_te]
            
            clf.fit([u_t,i_tr], y_tr, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=True,
                      validation_data=([u_t,i_te],y_te)
                      )
            mf_tr[ind_te] += clf.predict(X_te).ravel()
            mf_te += clf.predict(X_test).ravel()*0.5
            y_pred = np.clip(clf.predict(X_te).ravel(),1,3)
            score = rmse(y_te, y_pred)
            print('round',i,'finished')
            # print 'pred[{}] score:{}'.format(i, score)
    return (mf_tr / n_round, mf_te / n_round)

示例#2

0

显示文件

文件： hyper_classifier.py 项目： vietjtnguyen/ucla-spring13-cs269-project1-static

	def run(self, hyper_classifier, training_data, training_target, testing_data, testing_target):
		'''
		TODO DOCUMENTATION
		'''
		results = {'name': self.name, 'parameterization': self.parameterization, 'exception': None}
		try:
			self.classifier = hyper_classifier.make_classifier(training_data, training_target, **self.parameterization)
			self.classifier.fit(training_data, training_target)
			results['predicted'] = self.classifier.predict(testing_data)
		except MemoryError as e:
			raise e
		except Exception as e:
			print(repr(e))
			results['exception'] = e
		else:
			# attempt to save memory
			del(self.classifier)
			self.classifier = None

			results['ml_metric_ce'] = ml_metrics.ce(testing_target, results['predicted'])
			results['ml_metric_rmse'] = ml_metrics.rmse(testing_target, results['predicted'])
			results['sklearn_metric_accuracy'] = sklearn.metrics.accuracy_score(testing_target, results['predicted'])
			results['sklearn_metric_f1'] = sklearn.metrics.f1_score(testing_target, results['predicted'])
			results['sklearn_metric_precision'] = sklearn.metrics.precision_score(testing_target, results['predicted'])
			results['sklearn_metric_recall'] = sklearn.metrics.recall_score(testing_target, results['predicted'])

			results['ml_metric_auc'] = {}
			results['sklearn_metric_auc'] = {}
			for label in set(testing_target):
				binary_testing_target = np.array(map(lambda x: 1 if x == label else 0, testing_target))
				binary_predicted = np.array(map(lambda x: 1 if x == label else 0, results['predicted']))
				results['ml_metric_auc'][label] = ml_metrics.auc(binary_testing_target, binary_predicted)
				results['sklearn_metric_auc'][label] = sklearn.metrics.auc_score(binary_testing_target, binary_predicted)

		return results

示例#3

0

显示文件

文件： lasso.py 项目： leewind/kaggle-competitive-data-science-predict-future-sales

def score(pred, y):
    '''
    给最后测试结果打分，根据不同的标准，这里需要每次都改
    '''

    metric = rmse(y, pred)
    print(metric)
    return metric

示例#4

0

显示文件

def compare(a, b):
    m = pc.from_file(a).points
    n = pc.from_file(b).points
    m = [tuple(m.x), tuple(m.y), tuple(m.z)]
    m = m[0]
    n = [tuple(n.x), tuple(n.y), tuple(n.z)]
    n = n[0]
    v1, v2 = verify_rmse(m, n), rmse(m, n)
    print(v1, v2)

示例#5

0

显示文件

文件： xgb_custom_training.py 项目： MichalPorwisz/warsaw-pollution

def custom_valid_scheme(model,
                        train,
                        valid,
                        feats,
                        target,
                        agg_function,
                        early_stopping=5,
                        val_at_num_epoch=5):
    def _train(X_train, model, y_train, iteration):
        # TODO: Train based on model from previous iteration instead of from scratch (although it's not a real bottleneck)
        model.get_model().set_params(n_estimators=(iteration *
                                                   val_at_num_epoch))
        model.fit(X_train, y_train)

    extract_test_func = lambda df: df[df['split'] == 'valid']

    X_train = train[feats]
    y_train = train[target]

    epochs_without_improvement = 0
    best_score = np.inf
    best_iter = 0
    iter = 1
    while epochs_without_improvement < early_stopping:
        _train(X_train, model, y_train, iter)

        new_valid = predict_one_by_one(train=train,
                                       test=valid,
                                       feats=feats,
                                       model=model,
                                       agg_function=agg_function,
                                       extract_test_func=extract_test_func)
        score = rmse(valid[target].values, new_valid[target].values)
        print(f'RMSE on valid: {score}')

        if score < best_score:
            best_score = score
            best_iter = iter
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        iter += 1

    model.get_model().set_params(n_estimators=(best_iter * val_at_num_epoch))
    model.fit(X_train, y_train)

    print(
        f'score didn\'t improve for {epochs_without_improvement} epochs - finished training with best score of {best_score}'
    )
    return best_score

示例#6

0

显示文件

def make_mf_regression(X,
                       y,
                       qid,
                       X_test,
                       n_round=5,
                       batch_size=1024 * 6,
                       nb_epoch=10):

    u, i = X
    n = u.shape[0]
    '''
    Fit metafeature by @clf and get prediction for test. Assumed that @clf -- regressor
    '''
    mf_tr = np.zeros(u.shape[0])
    mf_te = np.zeros(X_test[:, 0].shape[0])
    for i in range(n_round):
        skf = KFold(n, n_folds=2, shuffle=True, random_state=42 + i * 1000)
        for ind_tr, ind_te in skf:
            clf = build_model()

            u_tr = u[ind_tr]
            u_te = u[ind_te]

            i_tr = i[ind_tr]
            i_te = i[ind_te]

            y_tr = y[ind_tr]
            y_te = y[ind_te]

            clf.fit([u_t, i_tr],
                    y_tr,
                    batch_size=batch_size,
                    nb_epoch=nb_epoch,
                    verbose=1,
                    shuffle=True,
                    validation_data=([u_t, i_te], y_te))
            mf_tr[ind_te] += clf.predict(X_te).ravel()
            mf_te += clf.predict(X_test).ravel() * 0.5
            y_pred = np.clip(clf.predict(X_te).ravel(), 1, 3)
            score = rmse(y_te, y_pred)
            print('round', i, 'finished')
            # print 'pred[{}] score:{}'.format(i, score)
    return (mf_tr / n_round, mf_te / n_round)

示例#7

0

显示文件

    mod1)  #checking the data points which are influencing

e_new = e_toy.drop(
    e_toy.index[[80, 960, 221]], axis=0
)  #Looks like 80,860,221 data points are influencing, Hence we remove it.

mod1_new = smf.ols(
    'Price ~ Age+KM+HP+cc+Dr+gr+Qt+Wt',
    data=e_new).fit()  #Applying model1 for the newly created data set
mod1_new.summary(
)  #Looks good here as all the variable's p values are below 0.05
act1 = e_new.Price
#sm.graphics.plot_partregress_grid(mod1_new)
#Prdicting Prices using mod1
pred1 = mod1_new.predict(e_new)  #Predicting the price using model1
rootmse = rmse(pred1, e_new.Price)  #calculating the root mean square error
rootmse  # = 1227.473986005888
df = pd.DataFrame(list(zip(pred1, act1)),
                  columns=[
                      'Predicted Prices', 'Actual Prices'
                  ])  #creating the data set of predicted and actual prices.
df
'''
############################################################## BUilding Model 2 #######################################################################################################################################################################################################################################################################################################################################################
mod2 = smf.ols('Price ~ np.log(Age)+KM+HP+cc+Dr+gr+Qt+Wt',data=e_new).fit()
mod2.summary()
#Since the p values for cc and Dr are above than 0.05, so lets check for significance
mod_2d =  smf.ols('Price~Dr',data=e_new).fit()#applying model 2 for only Dr against price
mod_2d.summary()#Shows that it is significant
#e_new2 = e_new2.drop(['Dr'], axis = 1)

示例#8

0

显示文件

文件： NNTest.py 项目： denadai2/Gas-consumption-outliers

    # Discard the label row
    #
    if sample[0] == 'gas [m3]':
        continue

    label, x = sample[0], sample[1:]

    #
    # Insert the sample into the Dataset
    #
    tstdata.appendLinked(x, label)


#
# Write the output of the final network
#
predictedA, actualA = predict(n, tstdata['input'], tstdata['target'])

print "MAPE: ", mape(actualA, predictedA)
print "RMSE: ", metrics.rmse(actualA, predictedA)
print "MAE: ", metrics.mae(actualA, predictedA)

data = [["actual", "predicted"]]
data.extend(np.hstack([actualA, predictedA]))

with open('results/'+filename, 'w') as fp:
    a = csv.writer(fp, delimiter=',')
    a.writerows(data)

示例#9

0

显示文件

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from ml_metrics import rmse
# loading the data
Computerdata =pd.read_csv("D:\\ExcelR Data\\Assignments\\Multi linear Regration\\Computer_Data.csv")
Le = preprocessing.LabelEncoder() ##Label encoder() using for levels of categorical features into numerical values
Computerdata['Cd'] = Le.fit_transform(Computerdata['cd'])
Computerdata = Computerdata.drop('cd',axis = 1)
Computerdata['Multi'] = Le.fit_transform(Computerdata['multi'])
Computerdata = Computerdata.drop('multi',axis = 1)
Computerdata['Premium'] = Le.fit_transform(Computerdata['premium'])
Computerdata = Computerdata.drop('premium',axis = 1)
Computerdata.describe()
sns.pairplot(Computerdata)
Computerdata.columns
Computerdata.corr()#Correlation of coeficent
import statsmodels.formula.api as smf
#Building a model
#To predict the price of computers,here I'm  adding speed+hd+ram+screen+ads+trend+Cd+Multi+Premium against the Price
Model=smf.ols("price~speed+hd+ram+screen+ads+trend+Cd+Multi+Premium",data=Computerdata).fit()
Model.params
Model.summary()
#From my first model I got each and every variables as significant which means P-value less than 0.05 
#so here i'm predicting the price of computers from my model
Pred=Model.predict(Computerdata)
Pred
rootmse = rmse(Pred,Computerdata.Pr)
rootmse

示例#10

0

显示文件

文件： NNConv.py 项目： denadai2/Gas-consumption-outliers

print "Number of training patterns: ", len(DS)
print "Input and output dimensions: ", DS.indim, DS.outdim
print "number of units in hidden layer: ", nNeurons

#
# Build network with
#
n = buildNetwork(nFeatures, nNeurons, nOutput)
trainer = BackpropTrainer( n, dataset=DS, verbose=True,momentum=0.01)

#
# Training graph
#
graph = [("training", "test")]
a,b=predict(n, tstdata['input'], tstdata['target'])
bestValError = metrics.rmse(a,b)
epochsThreshold = 12
epochsCount = 0
bestweights = trainer.module.params.copy()
i = 0
while True:
    trainer.trainEpochs(1)

    predictedA, actualA = predict(n, DS['input'], DS['target'])
    trainingError = metrics.rmse(actualA, predictedA)
    predictedA, actualA = predict(n, tstdata['input'], tstdata['target'])
    validationError = metrics.rmse(actualA, predictedA)

    if validationError > bestValError:
        epochsCount = epochsCount+1
    else:

示例#11

0

显示文件

文件： test_sgd.py 项目： ririw/kaggle-bimbo-pymc3

for i in tqdm(range(N)):
    logging.info('Starting batch {}'.format(i))
    data = features.make_train_batch(i)
    logging.info('Got data')
    X = data.drop(dropped_cols, 1)
    y = data.adjusted_demand
    logging.info('Training...')
    cls.fit(X, y)
    logging.info('Trained!')

ys = []
y_preds = []
for i in tqdm(range(N)):
    data = features.make_test_batch(i)
    X = data.drop(dropped_cols, 1)
    ys.append(data.adjusted_demand)
    y_pred = np.maximum(cls.predict(X), 1)
    y_preds.append(y_pred)

y = np.concatenate(ys)
y_pred = np.concatenate(y_preds)
del ys, y_preds

print(y_pred.shape)
print(y.shape)
print(y_pred[:10])
print(y[:10])
print(ml_metrics.rmse(y, y_pred))
print(ml_metrics.rmsle(y, y_pred))
print(pandas.Series(cls.coef_, index=X.columns).sort_values())

示例#12

0

显示文件

文件： test_elementwise.py 项目： 9G1IC/Metrics

 def test_rmse(self):
     self.assertAlmostEqual(metrics.rmse(range(0,11), range(1,12)), 1)
     self.assertAlmostEqual(metrics.rmse([0,.5,1,1.5,2], [0,.5,1,1.5,2]), 0)
     self.assertAlmostEqual(metrics.rmse(range(1,5), [1,2,3,5]), 0.5)

示例#13

0

显示文件

文件： Toyota Corolla.py 项目： Vimal666/Multilinear-regression

#predicting the price from model6
pred6 = Model6.predict(TC_New)
pred6
#from this model6 i can see pvalues for each and every variable which is having less than 0.05 and R^2 values i got 0.852
#so i can say R^2 value hasn't improved
#Here i'm bulding a table to see which model having higher R^2 value
values = list([
    Model1.rsquared, Model2.rsquared, Model3.rsquared, Model4.rsquared,
    Model5.rsquared, Model6.rsquared
])
coded_variables = list([
    'Model1.rsquared', 'Model2.rsquared', 'Model3.rsquared', 'Model4.rsquared',
    'Model5.rsquared', 'Model6.rsquared'
])
variables = list(
    ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5', 'Model 6'])
Rsquared_model = pd.DataFrame(
    list(zip(variables, coded_variables, values)),
    columns=['Models', 'Variabels Named in the code', 'R^Squared Values'])
Rsquared_model
#From my analysis I can say my Model3 is better one compare to other models,which having higher R^2 value (0.8789) and from that Model3 I didn't removed any variables because all variables having less than 0.05 P-values
#finally am checking the root mean square error for my pred3 data with actual data called TC_new
import statsmodels.api as sm
rootmse = rmse(pred3, TC_new.Price)
rootmse
#Out[215]: 1227.2689621781449
#Next i'm dataframing predicted data with actual data and store it in a new variable called df
Actual = TC_new.Price
df = pd.DataFrame(list(zip(pred3, Actual)),
                  columns=['Predicted Prices', 'Actual Prices'])

示例#14

0

显示文件

文件： Part 3 - Regression.py 项目： apostolisken/ITC6003finalproject

def scores(actuals, predicteds):
    rmses = rmse(actual=actuals, predicted=predicteds)
    mses = mean_squared_error(actuals, predicteds)
    maes = mae(actual=actuals, predicted=predicteds)
    r2s = r2_score(actuals, predicteds)
    return rmses, maes, r2s, mses

示例#15

0

显示文件

文件： xgboostClassifier.py 项目： burness/ppd_code

 def rmse_loss(self,y,y_pred):
     return rmse(y,y_pred)

示例#16

0

显示文件

                              random_state=seed)

    X = np.concatenate([X_train, X_test, test])

    user_le = LabelEncoder()
    item_le = LabelEncoder()

    user_le.fit(X[:, 0])
    item_le.fit(X[:, 1])

    X_train[:, 0] = user_le.transform(X_train[:, 0])
    X_train[:, 1] = item_le.transform(X_train[:, 1])

    if use_all:
        X = np.concatenate([X_train, X_test])
        y_train = np.concatenate([y_train, y_test])
        X_train[:, 0] = user_le.transform(X[:, 0])
        X_train[:, 1] = item_le.transform(X[:, 1])

    X_test[:, 0] = user_le.transform(X_test[:, 0])
    X_test[:, 1] = item_le.transform(X_test[:, 1])

    rf = RandomForestRegressor(n_estimators=100,
                               max_depth=12,
                               n_jobs=7,
                               random_state=seed)
    rf.fit(X_train, y_train)
    y_preds = rf.predict(X_test).ravel()
    score = rmse(y_test, y_preds)
    print('rf rmse score', score)

示例#17

0

显示文件

文件： 50_Startups_Python Codes.py 项目： neeraj2296/Multi-Linear-Regression-ExcelR

rsq_Ad = smf.ols('Ad~MS+RD',data=e_stu).fit().rsquared# RD and MS against Ad
vif_Ad = 1/(1-rsq_Ad)# = 3.04709935040856, Hence significant

d1 = {'Variables':['RD','MS','Ad'],'VIF':[vif_RD,vif_MS,vif_Ad]}# Combining the vif values wrt its variables
Vif_frame = pd.DataFrame(d1)# To a data frame
Vif_frame

sm.graphics.plot_partregress_grid(mod1_new)#Plotting regression models to check which variables explaining the most

fmod1_new = smf.ols('Pr~RD+MS',data=e_stu).fit()#We shall be removing Ad, even though it has feasible vif values, 
                                                #it does'nt have feasible p values to model 1, Hence the model 1 is
                                                #created without Ad
fmod1_new.summary()# Looks R Squared value of the model and the p values of variable are feasible

pred1 = fmod1_new.predict(e_stu)#Predicting the price using model1
rootmse = rmse(pred1,e_stu.Pr)#calculating the root mean square error
rootmse# = 7076.114277848526
act1 = e_stu.Pr
df = pd.DataFrame(list(zip(pred1, act1)),columns =['Predicted Prices', 'Actual Prices'])
df#created the data set of predicted and actual prices.
#Creating a table for all the Rsquared Values of the diffrent models that was built during correction of influenicing poins in the data set.
values = list([mod1.rsquared,mod1_new.rsquared,fmod1_new.rsquared])
coded_variables = list(['mod1.rsquared','mod1_new.rsquared','fmod1_new.rsquared'])
variables = list(['Model 1','Model 1 New','Final Model 1'])
#R_Squared_value_Of_models = {'Variables':[],'R^2 Value':[]}
Rsquared_model = pd.DataFrame(list(zip(variables,coded_variables,values)),columns = ['Models','Variabels Named in the code','R^Squared Values'])
Rsquared_model#Below is the table that shows how, on removing those outliers, R^Squared Value has improved.
'''
          Models Variabels Named in the code  R^Squared Values
0        Model 1               mod1.rsquared          0.950746
1    Model 1 New           mod1_new.rsquared          0.962343

示例#18

0

显示文件

文件： views.py 项目： chaitanyakasaraneni/youtubetrendingdataanalysis

#print(descriptionwithoutlinks[0])
#create matrix
indptr = [0]
indices = []
traindata = []
vocabulary = {}

for dec in descriptionwithoutlinks:
    for  words in dec:
        index = vocabulary.setdefault(words, len(vocabulary))
        indices.append(index)
        traindata.append(1)
    indptr.append(len(indices))
matrix = csr_matrix((traindata, indices, indptr), dtype=float).toarray()
length = len(matrix)

X_train, X_test, y_train, y_test = train_test_split(matrix, views, test_size=0.33, random_state=42)

#params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,'learning_rate': 0.01, 'loss': 'ls'}
#clf = ensemble.GradientBoostingRegressor(**params)
#clf.fit(X_train, y_train)

#y_predicted=clf.predict(X_test)
#print(len(y_test))
#print(len(y_predicted))
rf=LinearRegression()
rf.fit(X_train,y_train)
y_pred=rf.predict(X_test)
print(rmse(y_test,y_pred))

示例#19

0

显示文件

文件： load_data.py 项目： JakeMick/kaggle-bulldozer

def rmse_est(estimator, x, y):
    pred = estimator.predict(x)
    return -rmse(pred, y)

示例#20

0

显示文件

文件： TimeSeriesAnalisys.py 项目： GulyaevB/DataScience

r1 = sm.stats.DescrStatsW(otg1diff[m:])
r2 = sm.stats.DescrStatsW(otg1diff[:m])
print 'p-value: ', sm.stats.CompareMeans(r1,r2).ttest_ind()[1]

otg1diff.plot(figsize=(12,6))

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(otg1diff.values.squeeze(), lags=25, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(otg1diff, lags=25, ax=ax2)
src_data_model = otg[:'2013-05-26']
model = sm.tsa.ARIMA(src_data_model, order=(1,1,1), freq='W').fit(full_output=False, disp=0)

print model.summary()

q_test = sm.tsa.stattools.acf(model.resid, qstat=True) #свойство resid, хранит остатки модели 
                                                            #qstat=True, означает что применяем указынный тест к коэф-ам
print DataFrame({'Q-stat':q_test[1], 'p-value':q_test[2]})

pred = model.predict('2013-05-26','2014-12-31', typ='levels')
trn = otg['2013-05-26':]

r2 = r2_score(trn, pred[1:32])
print 'R^2: %1.2f' % r2

metrics.rmse(trn,pred[1:32])
metrics.mae(trn,pred[1:32])

otg.plot(figsize=(12,6))
pred.plot(style='r--')

示例#21

0

显示文件

文件： main.py 项目： k2345rftf/Python

# Определяем p и q
# fig = plt.figure(figsize=(12,8))
# ax1 = fig.add_subplot(211)
# fig = sm.graphics.tsa.plot_acf(otg1diff.values.squeeze(), lags=25, ax=ax1)
# ax2 = fig.add_subplot(212)
# fig = sm.graphics.tsa.plot_pacf(otg1diff, lags=25, ax=ax2)

p=1
q=1
d=1
# прогноз(Построение ARIMA модели)
dat = '2018-11-30'
src_data_model = otg[:dat]
model = sm.tsa.ARIMA(src_data_model, order=(p,d,q), freq='M').fit(disp=0)
print(model.summary())
# остатки модели и построение для них ACF
q_test = sm.tsa.stattools.acf(model.resid, qstat=True) #свойство resid, хранит остатки модели, qstat=True, означает что применяем указынный тест к коэф-ам
#Расчет коэффициента детерминации R^2
pred = model.predict(dat,'2018-12-31', typ='levels')
trn = otg[dat:'2018-11-30']
l = len(trn)+1
r2 = r2_score(trn, pred[1:l])
print ('R^2: %1.2f' % r2)
print("Среднеквадратичное отклонение нашей модели:")
print(metrics.rmse(trn,pred[1:l]))
print("Средняя абсолютная ошибка прогноза:")
print(metrics.mae(trn,pred[1:l]))

otg.plot()
pred.plot(style='r--')
plt.show()

示例#22

0

显示文件

文件： NN.py 项目： denadai2/Gas-consumption-outliers

print "number of units in hidden layer: ", nNeurons

#
# Build network with
#
n = buildNetwork(nFeatures, nNeurons, nOutput)
trainer = BackpropTrainer( n, dataset=trainData, verbose=True,momentum=momentum)

#
# Training graph
#
graph = [("training", "test")]
for i in range(0,nEpochs):
    trainer.trainEpochs(1)
    predictedA, actualA = predict(n, trainData['input'], trainData['target'])
    error = metrics.rmse(actualA, predictedA)
    predictedA, actualA = predict(n, tstdata['input'], tstdata['target'])
    error2 = metrics.rmse(actualA, predictedA)

    graph.append((i, error, error2))

with open('results/graphs/'+filename, 'w') as fp:
    a = csv.writer(fp, delimiter=',')
    a.writerows(graph)


#
# Write the output of the final network
#
predictedA, actualA = predict(n, tstdata['input'], tstdata['target'])

示例#23

0

显示文件

文件： MLR_50Startups.py 项目： Pushpadant07/Multi-Linear-Regration

model_am=smf.ols("Pr~Ad+Ms",data=Startups).fit()
model_am.summary()

# Added varible plot 
sm.graphics.plot_partregress_grid(Model2)

#Final Model
Model3=smf.ols("Pr~Rd+Ms",data=Startups).fit()
Model3.params
Model3.summary()

pred3=Model.predict(Startups_new)
pred3

#Finally i'm going for Root mean square error(RMSE) to check the average error in my data set
rootmse = rmse(pred3,Startups_new.Pr)
rootmse
Actual=Startups_new.Pr
#Creating a dataframe set for actual and predicted price
df = pd.DataFrame(list(zip(pred3, Actual)),columns =['Predicted Prices', 'Actual Prices'])
#Next i'm going for to create a r^2 value table for my three models
values = list([Model1.rsquared,Model2.rsquared,Model3.rsquared])#R^2 values
coded_variables = list(['Model1.rsquared','Model2.rsquared','Model3.rsquared'])#
variables = list(['Model 1','Model 2','Model 3'])
Rsquared_model = pd.DataFrame(list(zip(variables,coded_variables,values)),columns = ['Models','Variabels Named in the code','R^Squared Values'])
Rsquared_model

   Models       Variabels Named in the code   R^Squared Values
0  Model 1             Model1.rsquared          0.950746
1  Model 2             Model2.rsquared          0.950746
2  Model 3             Model3.rsquared          0.950450

示例#24

0

显示文件

文件： xgbRegression.py 项目： timruning/ali_music

 def rmse_loss(self, y, y_pred):
     return rmse(y, y_pred)

示例#25

0

显示文件

文件： NNCross.py 项目： denadai2/Gas-consumption-outliers

    # determine test indices
    test_idxs = perms[i]

    train_ds = SupervisedDataSet(nFeatures, nOutput)
    train_ds.setField("input" , inp[train_idxs])
    train_ds.setField("target" , tar[train_idxs])
    #
    # Build network with
    #
    n = buildNetwork(nFeatures, nNeurons, nOutput)
    trainer = BackpropTrainer( n, dataset=train_ds, verbose=True,momentum=0.01)
    trainer.trainEpochs(nEpochs)

    predictedA, actualA = predict(n, inp[test_idxs], tar[test_idxs])
    performances += metrics.rmse(actualA, predictedA)


print "CROSSVALIDATOR: ", performances/n_folds


'''predictedA, actualA = predict(n, tstdata['input'], tstdata['target'])

print "MAPE: ", mape(actualA, predictedA)
print "RMSE: ", metrics.rmse(actualA, predictedA)
print "MAE: ", metrics.mae(actualA, predictedA)

data = [["actual", "predicted"]]
data.extend(np.hstack([actualA, predictedA]))

with open('results/'+filename, 'w') as fp:

示例#26

0

显示文件

文件： mlp_svd_batch.py 项目： qqgeogor/RecomendationDL

        
        tr_gen = X_train_generatetor(128,um,im,batch_size=batch_size,name='X_train.csv')
        te_gen = X_train_generatetor(128,um,im,batch_size=batch_size,name='X_test.csv')

        for X_tr,y_tr in tr_gen:
            p_tr = model.predict_on_batch(X_tr)
            y_tr_preds.append(p_tr)

        for X_te,y_te in te_gen:
            p_te = model.predict_on_batch(X_te)
            y_te_preds.append(p_te)

        y_tr_preds = np.concatenate(y_tr_preds).ravel()
        y_te_preds = np.concatenate(y_te_preds).ravel()

        train_score = rmse(y_train,y_tr_preds)
        print('rmse train',train_score)
        test_score = rmse(y_test,y_te_preds)
        print('rmse test',test_score)

    print('Start Training')
    for epoch in range(nb_epoch):
        tr_gen = X_train_generatetor(128,um,im,batch_size=batch_size,name='X_train.csv')
        te_gen = X_train_generatetor(128,um,im,batch_size=batch_size,name='X_test.csv')
        # train
        # y_tr_preds = []
        # y_te_preds = []

        start_time = time.time()
        for X_tr,y_tr in tr_gen:
            model.train_on_batch(X_tr,y_tr)

示例#27

0

显示文件

文件： autoarima.py 项目： nefanov/dpm

ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(otg1diff.values.squeeze(), lags=25, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(otg1diff, lags=25, ax=ax2)
print 'otg'
print otg
src_data_model = otg[:'2015-11-09 03:26:26']
model = sm.tsa.ARIMA(src_data_model, order=(4,1,0)).fit() #trend='nc' if need
print model.summary()
q_test = sm.tsa.stattools.acf(model.resid, qstat=True)
print DataFrame({'Q-stat':q_test[1], 'p-value':q_test[2]})

pred = model.predict('2015-11-09 03:26:16','2015-11-09 03:29:06', typ='levels')
trn = otg['2015-11-09 03:26:26':]
r2 = r2_score(trn, pred)
print 'R^2: %1.2f' % r2
#mean-square rmse

metrics.rmse(trn,pred)

metrics.mae(trn,pred)

fig, (ax1) = plt.subplots(nrows = 1, sharex=True)
ax1.plot(otg.index,otg.values)

ax1.plot_date(pred.index,pred.values,'r--')

plt.show()

#print pred.values

示例#28

0

显示文件

    model.resid, qstat=True
)  #свойство resid, хранит остатки модели, qstat=True, означает что применяем указынный тест к коэф-ам
print(DataFrame({'Q-stat': q_test[1], 'p-value': q_test[2]}))

# prediction
# pred = model.predict(start=src_data_model.shape[0], end=src_data_model.shape[0]+100)
pred = model.predict(start='2017-10-25 00:00:00', end='2017-10-30 00:00:00')
trn = p['2017-10-25 00:00:00':'2017-10-30 00:00:00']
print(pred)
# pred.plot(figsize=(12, 8), color='red')
plt.show()
# r2 = r2_score(trn, pred[1:32])
# print('R_2= %1.2f' % r2)

# RMSE for ARIMA
rmse = metrics.rmse(trn, pred)
print(rmse)
print(type(rmse))

# MAE for ARIMA
mae = metrics.mae(trn, pred)
print(mae)

scale = 1
deviation = float(rmse)
lower = pred - deviation * scale
lowerDF = pd.DataFrame({'Box': lower.values}, index=lower.index)
print(lowerDF)
lower_arr = lowerDF.as_matrix().squeeze()
p_arr = p.loc[lowerDF.index].as_matrix().squeeze()
pred_arr = pred.loc[lowerDF.index].as_matrix().squeeze()

示例#29

0

显示文件

 def test_rmse(self):
     self.assertAlmostEqual(metrics.rmse(range(0, 11), range(1, 12)), 1)
     self.assertAlmostEqual(
         metrics.rmse([0, .5, 1, 1.5, 2], [0, .5, 1, 1.5, 2]), 0)
     self.assertAlmostEqual(metrics.rmse(range(1, 5), [1, 2, 3, 5]), 0.5)

示例#30

0

显示文件

文件： TimeSeriesAnalisys.py 项目： yourdisplay/DataScience

print 'p-value: ', sm.stats.CompareMeans(r1, r2).ttest_ind()[1]

otg1diff.plot(figsize=(12, 6))

fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(otg1diff.values.squeeze(), lags=25, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(otg1diff, lags=25, ax=ax2)
src_data_model = otg[:'2013-05-26']
model = sm.tsa.ARIMA(src_data_model, order=(1, 1, 1),
                     freq='W').fit(full_output=False, disp=0)

print model.summary()

q_test = sm.tsa.stattools.acf(
    model.resid, qstat=True)  #свойство resid, хранит остатки модели
#qstat=True, означает что применяем указынный тест к коэф-ам
print DataFrame({'Q-stat': q_test[1], 'p-value': q_test[2]})

pred = model.predict('2013-05-26', '2014-12-31', typ='levels')
trn = otg['2013-05-26':]

r2 = r2_score(trn, pred[1:32])
print 'R^2: %1.2f' % r2

metrics.rmse(trn, pred[1:32])
metrics.mae(trn, pred[1:32])

otg.plot(figsize=(12, 6))
pred.plot(style='r--')

示例#31

0

显示文件

文件： rf_svd.py 项目： qqgeogor/RecomendationDL

    X_train,y_train = resample(X_train,y_train,n_samples = X_train.shape[0]/10, random_state =seed)
    X_test,y_test = resample(X_test,y_test,n_samples = X_test.shape[0]/10, random_state =seed)
    
    
    X = np.concatenate([X_train,X_test,test])

    user_le = LabelEncoder()
    item_le = LabelEncoder()

    user_le.fit(X[:,0])
    item_le.fit(X[:,1])

    X_train[:,0] = user_le.transform(X_train[:,0])
    X_train[:,1] = item_le.transform(X_train[:,1])

    if use_all:
        X = np.concatenate([X_train,X_test])
        y_train = np.concatenate([y_train,y_test])
        X_train[:,0] = user_le.transform(X[:,0])
        X_train[:,1] = item_le.transform(X[:,1])

    X_test[:,0] = user_le.transform(X_test[:,0])
    X_test[:,1] = item_le.transform(X_test[:,1])
        
        
    rf = RandomForestRegressor(n_estimators = 100,max_depth=12,n_jobs=7,random_state=seed)
    rf.fit(X_train,y_train)
    y_preds = rf.predict(X_test).ravel()
    score = rmse(y_test,y_preds)
    print('rf rmse score',score)

示例#32

0

显示文件

文件： model.py 项目： xenerizes/models

 def rmse(self):
     actual = self.get_series()
     fitted = self.get_fitted_values()
     return ml_metrics.rmse(actual, fitted)

示例#33

0

显示文件

文件： stat3_plus_rfma.py 项目： nefanov/mv_stats

fig, axes = plt.subplots(ncols=1, figsize=(5,4))
TestModels.R2_Y1.plot( kind='bar', title='R2 metrics for different models - Random Forest wins')
#TestModels.R2_Y2.plot(ax=axes[1], kind='bar', color='green', title='R2_Y2')
#random_forest is the best ->
model = models[1]
model.fit(Xtrn, Ytrn)
#model.summary()
inf=model.feature_importances_
print inf
#then predict with 
#print trn
res=model.predict(trn)
print(res)
#print trg
print "rmse"
m = metrics.rmse(trg.sure.values,res)

err = metrics.mae(trg.sure.values,res)
print m
print "mean_abs_err"
print err
print "in persentage:"
print(100.0*err/(trg.sure.values.max() - trg.sure.values.min()))

#plot.grid()
#plt.show()

fig = plt.figure()
from mpl_toolkits.mplot3d import Axes3D
ax = Axes3D(fig)
ys = t1.dpr.values