# selektuj uset/activity def find_optimal_HMM(activities, cluster_range): scores=[] for k in cluster_range: model=GaussianHMM(n_components=k, covariance_type="diag", n_iter=1000).fit(activities) log_likelihood=model.score(activities) bic=2*log_likelihood-k*np.log(len(activities)) scores.append((k, log_likelihood, bic)) return scores model.score() ### Print or save clusters #for result in results_single_hmm: # draw_clusters(result) ### Ploting complete behavior '''
# %% df['mempool-size'].iloc[1000:].plot() # %% sklearn linear regression from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split nlag = 10 cutoff = 1400 model = LinearRegression() feature_names = [ 'mempool-size', 'n-transactions-per-block', 'n-transactions', 'hash-rate' ] target_name = ['transaction-fees'] features = [] for lag in range(nlag): features.append(df[feature_names].shift(lag)) features[-1].columns = [name + str(lag) for name in feature_names] features = pd.concat(features, axis=1) #features.fillna(method='backfill', axis=0, inplace=True) features = features.loc[cutoff:, :] target = df.loc[1200:, target_name] #target = target.loc[cutoff:, :] X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.1) model.fit(X_train, y_train) print(model.score(X_test, y_test))
# In[74]: plt.figure(figsize=(15, 5)) plt.title("Linear Regression Model") plt.xlabel("Date") plt.ylabel("Open Price") plt.plot(train.index, y_train, "blue", label='Training data') plt.plot(test.index, y_test, "green", label='Testing data') plt.plot(test.index, y_pred, "red", label='Predicted data') plt.xticks(np.arange(0, 242, 25), dataset['Date'][0:242:25]) #plt.plot(test.index,regressor.predict(x_test),"yellow") plt.legend() # In[83]: r_sq = model.score(x_train, y_train) print('coefficient of determination:', r_sq) print('Slope of model: ', model.coef_) print('Intercept of model: ', model.intercept_) # In[57]: err = metrics.mean_squared_error(y_test, y_pred) print("Mean squared error: ", err) # In[85]: # k nearest neighbour import pandas as pd import numpy as np import matplotlib.pyplot as plt
X = series size = int(len(X) * 0.66) train, test = X[0:size], X[size:len(X)] dates_test=dates[size:len(dates)] history = [x for x in train] predictions = list() for t in range(len(test)): model = ARIMA(history, order=(3,1,0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] predictions.append(yhat) obs = test[t] history.append(obs) print('predicted=%f, expected=%f, average= %f ' % (yhat, obs, obs-yhat)) score=model.score(test) print(score) # plot pyplot.plot(dates_test,test, lw=1.5, color="blue", label="excepted") pyplot.plot(dates_test,predictions, color='red', label="predicted") pyplot.show() mse = mean_squared_error(test, predictions) print("Mean Squared Error:",mse) rmse = math.sqrt(mse) print("Root Mean Squared Error:", rmse)
predictions = list() yhat = float(model_fit.forecast()[0]) yhat = bias + inverse_difference(history, yhat, months_in_year) predictions.append(yhat) history.append(y[0]) print('>Predicted=%.3f, Expected=%3.f' % (yhat, y[0])) # rolling forecasts # predict # difference data months_in_year = 12 diff = difference(history, months_in_year) model = ARIMA(diff, order=(0, 0, 1)) model_fit = model.fit(trend='nc', disp=0) for i in range(1, len(y)): yhat = model_fit.forecast()[0] yhat = bias + inverse_difference(history, yhat, months_in_year) predictions.append(yhat) # observation obs = y[i] history.append(obs) print('>Predicted=%.3f, Expected=%3.f' % (yhat, obs)) # report performance mse = mean_squared_error(y, predictions) rmse = sqrt(mse) print('RMSE: %.3f' % rmse) print(model.score(y, predictions)) pyplot.plot(y, color='green') pyplot.plot(predictions, color='red') pyplot.show()