示例#1
0
def fit_model(X, Y):
    # funkcja zwracająca dopasowanie różnych modeli
    X = [[x] for x in X]
    regr = linear_model.LinearRegression().fit(X, Y)
    regr_ey = linear_model.LinearRegression().fit(np.exp(X), Y)
    regr_log = linear_model.LinearRegression().fit(np.log(X), Y)
    nb1NN = KNR(n_neighbors=1, algorithm='ball_tree').fit(X, Y)
    nb2NN = KNR(n_neighbors=2, algorithm='ball_tree').fit(X, Y)
    nb3NN = KNR(n_neighbors=3, algorithm='ball_tree').fit(X, Y)

    return regr, regr_ey, regr_log, nb1NN, nb2NN, nb3NN
示例#2
0
def fit_model(X, Y):
	#### function for generating linear model for individual ####
	X = [[x] for x in X]
	regr = linear_model.LinearRegression().fit(X, Y)
	regr_ey = linear_model.LinearRegression().fit(np.exp(X), Y)
	regr_log = linear_model.LinearRegression().fit(np.log(X), Y)
	nb1NN = KNR(n_neighbors=1, algorithm='ball_tree').fit(X, Y)
	nb2NN = KNR(n_neighbors=2, algorithm='ball_tree').fit(X, Y)
	nb3NN = KNR(n_neighbors=3, algorithm='ball_tree').fit(X, Y)

	return [regr, regr_ey, regr_log, nb1NN, nb2NN, nb3NN]
def cross_validation(X, Y):

    # X - lista data['stimulus']
    # Y - lista data['converted']

    resultsX = []
    resultsId = []
    resultsRegr = []
    resultsRegr_ey = []
    resultsRegr_log = []
    results1NN, results2NN, results3NN = [], [], []
    resultsY = []

    kf = cross_validation.LeaveOneOut(N)
    #kf = cross_validation.LeaveOneOut(N) #LeaveOneOut == KFold(n, n_folds=n)
    for train_index, test_index in kf:
        X_train, Y_train = X[train_index], Y[train_index]
        X_test, Y_test = X[test_index], Y[test_index]

        resultsX.append(X_test.values[0])
        resultsId.append(Y_test.values[0])
        resultsY.append(Y_test.values[0])

        X_train_list = [[x] for x in X_train]

        regr = linear_model.LinearRegression().fit(X_train_list, Y_train)
        resultsRegr += [[float(regr.predict(x)) for x in X_test]]

        regr_ey = linear_model.LinearRegression().fit(np.exp(X_train_list),
                                                      Y_train)
        resultsRegr_ey += [[float(regr_ey.predict(np.exp(x))) for x in X_test]]

        regr_log = linear_model.LinearRegression().fit(np.log(X_train_list),
                                                       Y_train)
        resultsRegr_log += [[
            float(regr_log.predict(np.log(x))) for x in X_test
        ]]

        nb1NN = KNR(n_neighbors=1,
                    algorithm='ball_tree').fit(X_train_list, Y_train)
        results1NN += [[float(nb1NN.predict(x)[0]) for x in X_test]]

        nb2NN = KNR(n_neighbors=2,
                    algorithm='ball_tree').fit(X_train_list, Y_train)
        results2NN += [[float(nb2NN.predict(x)[0]) for x in X_test]]

        nb3NN = KNR(n_neighbors=3,
                    algorithm='ball_tree').fit(X_train_list, Y_train)
        results3NN += [[float(nb3NN.predict(x)[0]) for x in X_test]]

    return resultsX, resultsId, resultsY, resultsRegr, resultsRegr_ey, resultsRegr_log, results1NN, results2NN, results3NN
示例#4
0
def model(X_train,
          y_train,
          X_test=np.array([]),
          y_test=np.array([]),
          method="LR"):
    #X_train inputs of model for training
    #X_test inputs of model fortesting
    #y_train -outputs for Xtrain
    #y_test - outputs fo X_test
    #method of model design. Default method is linear regression

    if method == "LR":
        lr = LR()
    elif method == "Ridge":
        lr = Ridge()
    elif method == "Lasso":
        lr = Lasso()
    elif method == "MLPRegressor":
        lr = MLPRegressor()
    elif method == "SVR":
        lr = SVR()
    elif method == "KNR":
        lr = KNR()
    elif method == "RFR":
        lr = RFR()
    elif method == "GBR":
        lr = GBR()
    else:
        print("unknown method")
        return False

#    lr = MLPRegressor( hidden_layer_sizes=[5], activation ="relu")
#    lr = MLPRegressor()
#    lr=SVR()
#    lr=KNR()
#
#    lr=Ridge(alpha=alpha.x)
#    lr=Ridge()
#    lr=Lasso(alpha=0.001)

#    lr=Lasso()

#    lr=RFR(n_estimators=5, max_features=2, max_depth=2, random_state=2)
#    lr=RFR()

#    lr=GBR()

    lr = lr.fit(X_train, y_train[:, 0])
    y_mod_train = lr.predict(X_train)
    c_train = CCC(y_train, y_mod_train[:, np.newaxis])

    c_test = -1
    if len(y_test) > 0:
        y_mod_test = lr.predict(X_test)
        c_test = CCC(y_test, y_mod_test[:, np.newaxis])

    return (lr, c_train, c_test)
示例#5
0
class Algorithms(Enum):
    RandomForestRegressor = RFR()
    MLPRegressor = MLPR()
    KNeighborsRegressor = KNR()
    Ridge = RR()
    Lasso = LR()

    def __str__(self):
        return self.name
示例#6
0
def main():
    merged = pd.read_csv('./data/merge_copy.csv')
    merged = merged[merged['logerror'] <
                    merged['logerror'].quantile(0.995)]  # exclude outliers
    merged = merged[merged['logerror'] > merged['logerror'].quantile(0.005)]
    merged.drop([
        'garagecarcnt', 'heatingorsystemtypeid', 'propertylandusetypeid',
        'regionidcity', u'taxdelinquencyflag', u'taxdelinquencyyear'
    ],
                axis=1,
                inplace=True)

    logerror = merged['logerror']
    X_train, X_test, y_train, y_test = train_test_split(merged.drop(
        ['logerror', 'parcelid'], axis=1),
                                                        logerror,
                                                        test_size=0.2,
                                                        random_state=42)

    sc = StandardScaler()
    sc_train = sc.fit_transform(X_train)
    sc_test = sc.transform(X_test)

    # svr = svm.LinearSVR(tol=0.000001,max_iter = 50000)
    # svr_param_grid = {
    #     'C' : [0.005,0.01,0.05,0.1],
    #     # 'loss': ('epsilon_insensitive','squared_epsilon_insensitive')
    # }

    # svr_gcv = GridSearchCV(svr,svr_param_grid,scoring = make_scorer(mean_absolute_error,greater_is_better=False),n_jobs=6)

    # svr_gcv.fit(sc.fit_transform(merged.drop(['logerror','parcelid'],axis=1)),logerror)
    # print svr_gcv.best_estimator_
    # print svr_gcv.best_score_

    knr = KNR(n_jobs=4)
    knr_param_grid = {
        'n_neighbors': [1500, 1800, 2500],
        'leaf_size': [100, 200, 300],
        'algorithm': ('ball_tree', 'kd_tree', 'brute'),
    }
    knr_gcv = GridSearchCV(knr,
                           knr_param_grid,
                           scoring=make_scorer(mean_absolute_error,
                                               greater_is_better=False))

    knr_gcv.fit(merged.drop(['logerror', 'parcelid'], axis=1), logerror)

    knr_best = knr_gcv.best_estimator_
    print knr_gcv.best_score_
    print "For unscaled features, mean_absolute_error is ", mean_absolute_error(
        y_test, knr_best.predict(X_test))
    print knr_best
示例#7
0
def create_growth_rate_mapping(lim_data, Z, **kwargs):
    """builds a model which maps the gene expression to growth rate
    includes no growth points outside the data"""
    # weights are not calculated here, should be done above this level
    #lim_data = lim_data
    if 'n_neighbors' in kwargs.keys():
        n_neighbors = kwargs['n_neighbors']
    else:
        n_neighbors = 8
    allgrdat = lim_data
    allgr = Z
    clf = KNR(n_neighbors, 'distance')
    clf.fit(allgrdat.values, allgr.values)
    return clf
示例#8
0
def predict(k,X,Y,x,stock_symbols):
    #Makes predictions, creates a classifier, fits the training and target data, and then predicts next days EODs.
    predictions = {}
    clf = KNR(n_neighbors=k) # maybe activate weights='distance'
    #clf = MLPR(hidden_layer_sizes=(8,4),max_iter=10000)
    
    if len(X) > 0 and len(X) == len(Y):
        clf.fit(X,Y)
        X_width = len(X[0])
        i = 0
        for stock_symbol in stock_symbols:
            i += 1
            if stock_symbol in x:
                print("Predicting:     %20s %7.2f %%                  \r" % (stock_symbol, 100*i/len(stock_symbols)),end="")
                if len(x[stock_symbol]) == X_width:
                    predictions[stock_symbol] = clf.predict([x[stock_symbol]])[0]
        print()
    return predictions
示例#9
0
def train_model():
    data = get_data()
    data['co'] = data.state == 'colorado'
    data['fl'] = data.state == 'florida'
    data['nj'] = data.state == 'new jersey'
    data['ca'] = data.state == 'california'
    print(data.shape)

    X_train, X_test, y_train, y_test = split_data(data)
    X_train, y_train = remove_county_state(X_train, y_train)
    X_test, y_test = remove_county_state(X_test, y_test)

    # data preprocessing (removing mean and scaling to unit variance with StandardScaler)
    pipeline = make_pipeline(StandardScaler(), KNR())

    # set hyperparameters
    hyperparameters = {
        'kneighborsregressor__n_neighbors': [100, 50, 20, 15, 10, 5, 3, 2],
        'kneighborsregressor__weights': ['uniform', 'distance'],
    }

    # tune model via pipeline
    clf = GridSearchCV(pipeline, hyperparameters, cv=3)

    clf.fit(X_train, y_train)

    pred = clf.predict(X_test)
    # print('feature importances:', clf.feature_importances_)
    print('r2 score:', r2_score(y_test, pred))
    print('mse:', mean_squared_error(y_test, pred))
    print('*' * 20)
    print('best params:', clf.best_params_)
    print('best grid:', clf.best_estimator_)
    print('^' * 20)
    eval_model(clf.best_estimator_, X_train, y_train, X_test, y_test)
    print('#' * 20)
    print('score', clf.score)
    return clf
示例#10
0
文件: bhp.py 项目: vishal-subedi/test
#support vector regression
svr = SVR(kernel = 'poly', degree = 3, C = 1)

#training
svr.fit(X_train, y_train)

#testing
y_pred_svr = svr.predict(X_test)

#r_square
r_2_svr = r2_score(y_test, y_pred_svr)



#KNeighbors Regression
knr = KNR(n_neighbors = 4, weights = 'distance', p = 4)

#training
knr.fit(X_train, y_train)

#testing
y_pred_knr = knr.predict(X_test)

#r_square
r_2_knr = r2_score(y_test, y_pred_knr)



#Random Forest Regression
rfr = RFR(n_estimators = 100, max_features = 'auto', random_state = 1)
示例#11
0
        j+=1
    dicts.append(dic)
    i+=1
#......

#......

# now get your testing error, store it for different K, and prepare to plot a figure
terr = []
for k in ks:
    for dict in dict
"""
terr = []
ks = [1,3,5,7,9]
for k in ks:
    knn = KNR(n_neighbors = k)
    knn.fit(sample_train,label_train)
    pred = knn.predict(sample_test)
    mse = (((pred - label_test) ** 2).sum()) / len(pred)
    terr.append(mse)

plt.plot(terr)
plt.title("KNN Testing error")
plt.legend()
plt.xticks([])
plt.show()

# ......

# ......
#Standard Scaler to transform data.
sc_X = preprocessing.StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)


# Data is highly variable due to seasonality of weather parameters, traffic, wind speed, and many other parameters taht are can be assocaited with different pollutants. KNNRegressor is tested first to capture this variation. 

# In[31]:


#import KNeighbors Regressor from sklearn
from sklearn.neighbors import KNeighborsRegressor as KNR
#initiate KNR regressor with 10 neighbors as an arbitrary starting point
regressor = KNR(n_neighbors = 3, metric = 'minkowski', p = 2)
regressor.fit(X_train, Y_train)

#Predict values based on KNR model
Y_hat = regressor.predict(X_test)

#Root Mean Squared Error is a common measure of 
from sklearn.metrics import mean_squared_error 
from math import sqrt

print('RMSE for n_neighbors = 3 is {}.'.format(sqrt(mean_squared_error(Y_test,Y_hat))))


# In[32]:

示例#13
0
def run():
    df = pd.read_csv(e1.get())
    s = int(e2.get())
    if s != 0:
        df.drop(df.columns[s-1], axis=1, inplace=True)

    s = int(e3.get())
    if s != 0:
        df.drop(df.columns[s-1], axis=1, inplace=True)

    no_of_rows, no_of_col = df.shape

    s = e4.get()

    if s != '0':
        for column in df:
            df = df[~df[column].isin([s, 'NaN'])]

    df.reset_index(drop=True, inplace=True)
    no_of_rows, no_of_col = df.shape


    for column in df:
        if str(type(df[column][0])).replace('<class ', '').replace('>','') == '\'str\'':
            try:
                df[column] = df[column].apply(lambda i: clean_float(i))
            except ValueError:
                encoder = le()
                encoder.fit(df[column])
                df[column] = encoder.transform(df[column])

    no_of_rows, no_of_col = df.shape
    p = e5.get()

    y = np.array((df[p]))
    x = np.array(df.drop(p, axis=1))


    x_train, x_test, y_train, y_test = tts(x, y, test_size=0.3)

    x_train = preprocessing.scale(x_train)
    x_test = preprocessing.scale(x_test)

    c1 = var1.get()
    c2 = var2.get()

    if c1 == 1:
        clf1 = svm.SVR(kernel='rbf')
        clf1.fit(x_train, y_train)
        model_name1 = e6.get()
        with open(model_name1, 'wb') as f:
            pickle.dump(clf1, f)
        result.delete('1.0', END)
        result.insert(END, 'Squared error Accuracy for SVR: ' + str(clf1.score(x_test, y_test))+'\n')

    if c2 == 1:
        clf2 = KNR(3)
        clf2.fit(x_train, y_train)
        model_name2 = e7.get()
        with open(model_name2, 'wb') as f:
            pickle.dump(clf2, f)
        if c1 != 1:
            result.delete('1.0', END)
        result.insert(END, 'Squared error Accuracy for KNN: ' + str(clf2.score(x_test, y_test)))
示例#14
0
文件: down.py 项目: sansonaa/dvoa
# df_avg == 3 year rolling average + yr4 stats
X, y = df_avg[['all_avg']].values, df_avg['all_yr'].values
X, y = df_avg[['all_prev']].values, df_avg['all_yr'].values
X, y = df_avg[['all_avg', 'all_prev']].values, df_avg['all_yr'].values

X, y = df_avg[[
    '1D_avg', '2D_avg', '3D_avg', 'all_avg', '1D_prev', '2D_prev', '3D_prev',
    'all_prev'
]].values, df_avg['all_yr'].values
X_train, X_test, y_train, y_test = tts(X, y)

lin = LR(fit_intercept=False)
lin.fit(X, y)
lin.score(X, y)

knn = KNR(n_neighbors=5)
knn.fit(X_train, y_train)
print knn.score(X_train, y_train)
print knn.score(X_test, y_test)

ns = range(1, 30, 2)
scores = []
for n in ns:
    knn = KNR(n_neighbors=n)
    knn.fit(X_train, y_train)
    scores.append(knn.score(X_train, y_train))

rf = RFR(n_estimators=50)
rf.fit(X_train, y_train)
rf.score(X_train, y_train)
示例#15
0
def main():
    # ----------------------------
    # Training data
    # ----------------------------
    # Loading training data
    trainingDataFile = 'Training_set.csv'
    trainingData = pd.read_csv(trainingDataFile)

    # Obtaining unique cases of events (Note: This remains the same for both training and test data)
    myEventSet = []
    for x in trainingData.events:
        if x not in myEventSet:
            myEventSet.append(x)
    print('Unique events are as follows: \n', myEventSet,'\n')


    # Event string value reassignment based on unique event cases in 'myEventSet'
    newEvents = []
    for x in trainingData.events:
        for i in range(len(myEventSet)):
            if x == myEventSet[i]:
                newEvents.append(i)

    # Converting datetime to Seconds and saving day of the week
    day = []
    numDateTrainData = []
    for i in range(len(trainingData.date)):
        date_obj = datetime.strptime(str(trainingData.date[i]), '%Y-%m-%d')
        numDateTrainData.append(date_obj.timestamp())
        day.append(date_obj.weekday())

    #print(trainingData.date)
    dictReqCount = {}
    for i in range(len(trainingData.date)):
        if day[i] not in dictReqCount.keys():
            dictReqCount[day[i]] = []
        dictReqCount[day[i]].append(trainingData.request_count[i])
    #print(dictReqCount)

    dictAvgReqCount = {}
    for key,val in dictReqCount.items():
        dictAvgReqCount[key] = sum(val)/len(val)
    #print(dictAvgReqCount)

    maxValue = max(dictAvgReqCount.values())
    maxKey = [key for key,val in dictAvgReqCount.items() if val == maxValue]
    print('Day #{} of the week has the max mean request count'.format(maxKey[0]))

    minValue = min(dictAvgReqCount.values())
    minKey = [key for key, val in dictAvgReqCount.items() if val == minValue]
    print('Day #{} of the week has the min mean request count'.format(minKey[0]))


    # Assembling feature arrays
    features_trainingData = []
    for i in range(len(numDateTrainData)):
        row = [numDateTrainData[i], day[i], trainingData.calendar_code[i], trainingData.site_count[i], trainingData.max_temp[i], trainingData.min_temp[i], trainingData.precipitation[i], newEvents[i]];
        features_trainingData.append(row)

    #for i in range(len(features_trainingData)):
    #    print(len(features_trainingData[i]))

    #Y = list(trainingData.request_count)
    Y = trainingData.request_count
    X = features_trainingData

    #print('length of Y =', len(Y))
    #print(features_trainingData)

    # Models that work on both continuous and discrete data
    scoring = 'neg_mean_squared_error'
    models = [DTR(),GNB(),RFR(),KNR()]
    '''models = [[DTR(), DTR(max_depth=2), DTR(max_depth=5)],
              [GNB(), GNB(priors=None)],
              [RFR(), RFR(), RFR()],
              [KNR(), KNR(), KNR()]]
              '''
    seed = 7
    kfold = MS.KFold(n_splits=10, random_state=seed)
    i = 0
    mErr = []
    for model in models:
        results = MS.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        mErr.append(results.mean())
        i += 1
    #print(mErr)

    best_model_index = 0
    maxAbsErrInd = math.fabs(mErr[0])
    for i in range(1, len(mErr)):
        if (math.fabs(mErr[i]) < maxAbsErrInd):
            best_model_index = i
            maxAbsErrInd = math.fabs(mErr[i])
    print('\nModel #%d (i.e. %s) performed best' %(best_model_index, str(models[best_model_index]).split('(')[0]))

    # -------------------------------------------------------
    # Test Data
    # -------------------------------------------------------
    # Loading test data
    testDataFile = 'Test_set.csv'
    testData = pd.read_csv(testDataFile)

    # Event string reassignment using myEventSet from training data
    newEvents = []
    for x in testData.events:
        for i in range(len(myEventSet)):
            if x == myEventSet[i]:
                newEvents.append(i)

    # Converting datetime to Seconds and determining days of the week
    day = []
    numDateTestData = []
    for i in range(len(testData.date)):
        date_obj = datetime.strptime(str(testData.date[i]), '%Y-%m-%d')
        numDateTestData.append(date_obj.timestamp())
        day.append(date_obj.weekday())

    # Assembling feature arrays
    features_testData = []
    for i in range(len(numDateTestData)):
        row = [numDateTestData[i], day[i], testData.calendar_code[i], testData.site_count[i], testData.max_temp[i],
               testData.min_temp[i], testData.precipitation[i], newEvents[i]];
        features_testData.append(row)

    # Test data features
    X_test = features_testData

    # Test data prediction
    bestModel = models[best_model_index]
    Y_pred = bestModel.fit(X, Y).predict(X_test)
    Y_pred_train = bestModel.fit(X, Y).predict(X)
    print('\nThe predicted values for request count using the test data is as follows:\n',Y_pred)

    output_file = open('predicted_request_count.csv','w')
    for i in range(len(Y_pred)):
        output_file.write(str(Y_pred[i])+'\n')
    output_file.close()

    # Plot the results
    plt.figure(1)
    plt.scatter(numDateTrainData, Y, c="darkorange", label="Training data")
    plt.scatter(numDateTestData, Y_pred, c="cornflowerblue", label="Test data model prediction")
    plt.scatter(numDateTrainData, Y_pred_train, c="red", label="Training data model prediction")
    plt.xlabel("Numerical Date")
    plt.ylabel("Page Count")
    plt.title("Best Model")
    plt.legend()
    plt.show()
print('The R2 score of the Decision Tree Regressor model is: \t\t\t%s' %
      dtr_r2)

# DECISION TREE CLASSIFIER
dtc = DTC()
dtc.fit(X_train, y_train)
dtc_msq = msq(dtc.predict(X_test), y_test)
dtc_r2 = r2(dtc.predict(X_test), y_test)
print(
    '\nThe mean squared error of the Decision Tree Classifier model is: \t%s' %
    dtc_msq)
print('The R2 score of the Decision Tree Classifier model is: \t\t\t%s' %
      dtc_r2)

# K NEAREST NEIGHBORS REGRESSOR
knr = KNR()
knr.fit(X_train, y_train)
knr_msq = msq(knr.predict(X_test), y_test)
knr_r2 = r2(knr.predict(X_test), y_test)
print(
    '\nThe mean squared error of the K Nearest Neighbors Regressor model is: \t%s'
    % knr_msq)
print('The R2 score of the K Nearest Neighbors Regressor model is: \t\t%s' %
      knr_r2)

# K NEAREST NEIGHBORS CLASSIFIER
knc = KNC()
knc.fit(X_train, y_train)
knc_msq = msq(knc.predict(X_test), y_test)
knc_r2 = r2(knc.predict(X_test), y_test)
print(
示例#17
0
# N17-->N9,N4,N24,N8
# N19-->N6,N1
# N20-->N4,N8
# N21-->N14,N10,N6
# N22-->N13,N4
# N23-->N24
# N35-->N20,N2

data = full[0:33050]

scaler = RobustScaler()

train = scaler.fit_transform(data.loc[data.N23.notnull(), ['N24']])
test = full.loc[full.N23.isnull(), ['N24']]
ya = data.loc[data.N23.notnull(), ['N23']]
model = KNR()
model.fit(train, ya)
pred = model.predict(test)

full.loc[full.N23.isnull(), ['N23']] = pred

##xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

full.loc[full.N13.isnull(), 'N13'] = 3

full.loc[full.N4.isnull(),
         'N4'] = full.loc[((full['C1'] == 1) & (full['C2'] == 0)),
                          'N4'].mean()

data = full[0:33050]
示例#18
0
import matlab
import os


from sklearn.neighbors import KNeighborsRegressor as KNR

today_train_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\train\today'
tomorrow_train_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\train\tomorrow'
today_test_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\test\today'
tomorrow_test_data_dir = r'D:\MasterDL\data_set\traffic_data\2011_yabx_speed\knn\test\tomorrow'
result_dir = r'D:\MasterDL\trans\yabx\knn_result'

today_train_data = matlab.read_matfile_from_dir(today_train_data_dir, 'speed',[252,35*288])
tomorrow_train_data = matlab.read_matfile_from_dir(tomorrow_train_data_dir, 'speed',[252,35*288])
today_test_data = matlab.read_matfile_from_dir(today_test_data_dir,'speed',[95,35*288])
tomorrow_test_data = matlab.read_matfile_from_dir(tomorrow_test_data_dir,'speed',[95,35*288])
reality = np.reshape(tomorrow_test_data,[1,95*35*288])
KNN = KNR(n_neighbors=1)
KNN.fit(today_train_data,today_train_data)
predictions = KNN.predict(today_test_data)
matlab.save_matrix(os.path.join(result_dir, 'knn_result.mat'), predictions, 'knn')
predictions = np.reshape(predictions,[1,95*35*288])
mse = ((reality-predictions)**2).mean()
print('mse = ', mse)
for i in range(0,95*35*288):
    if reality[0][i] == 0:
        reality[0][i]=1
rer = np.mean(abs(reality-predictions)/reality)
mae = np.mean(abs(reality-predictions))
print('mre = ', rer)
print('mae = ', mae)
示例#19
0
import matplotlib.pyplot as plt

Features, Labels = read_data("energy.txt")
X_train, X_test, y_train, y_test = train_test_split(Features,
                                                    Labels,
                                                    test_size=.20,
                                                    random_state=37)

#Standardization of data
X_train_scaled = prepros.scale(X_train)
X_test_scaled = prepros.scale(X_test)
"""
####################K-Nearest Neighbors####################
"""
#No changes
KNNregressor = KNR().fit(X_train, y_train)
KNN_y_pred = KNNregressor.predict(X_test)
print("Mean squared error for K-Nearest Neighbors: {:.3f}.".format(
    mean_squared_error(KNN_y_pred, y_test)))

#number of neigbors changed to 7
KNNregressor = KNR(n_neighbors=7).fit(X_train, y_train)
KNN_y_pred = KNNregressor.predict(X_test)
print("\tNumber of Neigbors changed to 7: {:.3f}.".format(
    mean_squared_error(KNN_y_pred, y_test)))

#wieght changed to distance
KNNregressor = KNR(weights='distance').fit(X_train, y_train)
KNN_y_pred = KNNregressor.predict(X_test)
print("\tWieght changed to distance: {:.3f}.".format(
    mean_squared_error(KNN_y_pred, y_test)))
示例#20
0
from sklearn.neighbors import KNeighborsRegressor as KNR
from sklearn import datasets as ds
from sklearn.preprocessing import scale
from numpy import linspace
import sklearn.cross_validation as cv

boston = ds.load_boston()
scaled = scale(boston.data)
kf = cv.KFold(len(scaled), n_folds=5, shuffle=True, random_state=42)
n = linspace(start=1, stop=10, num=200)
result = list()
for i in xrange(1, len(n) - 1):
    knr = KNR(n_neighbors=4, weights='distance', metric='minkowski', p=i)
    cs_result = cv.cross_val_score(knr,
                                   X=scaled,
                                   y=boston.target,
                                   cv=kf,
                                   scoring='mean_squared_error')
    result.append(cs_result.mean())
result_max = max(result)
print result.index(result_max) + 1
print result_max
示例#21
0
        except ValueError:
            encoder = le()
            encoder.fit(df[column])
            df[column] = encoder.transform(df[column])

p = input("Enter the name of prediction column:")

y = np.array((df[p]))
x = np.array(df.drop(p, axis=1))

x_train, x_test, y_train, y_test = tts(x, y, test_size=0.3)

x_train = preprocessing.scale(x_train)
x_test = preprocessing.scale(x_test)
y_train = preprocessing.scale(y_train)
y_test = preprocessing.scale(y_test)

print("Which algo would you like to use")
print("1. SVM")
print("2. KNN")
c = int(input("Enter your choice: "))

if c == 1:
    clf1 = svm.SVR(kernel='poly', degree=7)
    clf1.fit(x_train, y_train)
    print('Squared error Accuracy for SVR: ', clf1.score(x_test, y_test))
else:
    clf = KNR(10)
    clf.fit(x_train, y_train)
    print('Squared error Accuracy for KNN: ', clf.score(x_test, y_test))
示例#22
0
data = data.iloc[:rows, 1:]

Model_Names = [
    "Support Vector Classifier", "Linear SVC", "GaussianNB",
    "Random Forest Classifier", "K Nearest Neighbours", "AdaBoost Classifier",
    "Decision Tree Classifier"
]

print(data.columns.values)

Models = {
    1: SVC(C=1, kernel='rbf', gamma='scale', probability=True),
    2: LinearSVC(),
    3: GNB(),
    4: RFC(n_estimators=100),
    5: KNR(),
    6: ABC(base_estimator=RFC()),
    7: DTC()
}


def RunModel(model, data, columns, Predict):
    X = data[columns]
    Y = data[Predict]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        train_size=train,
                                                        test_size=test,
                                                        random_state=42)
示例#23
0
    svc_mnist.score(X_test_mnist_nmf, y_test_mnist)))

mlpc_mnist = MLPC(hidden_layer_sizes=(200), activation='tanh',
                  random_state=37).fit(X_train_mnist_nmf, y_train_mnist)
print("Test set score of MLPC mnist: {:.3f}".format(
    mlpc_mnist.score(X_test_mnist_nmf, y_test_mnist)))

print('mnist\n')
"""
##############################Regression##############################
"""
from sklearn.metrics import mean_squared_error

#k-Neighbors Regressor
from sklearn.neighbors import KNeighborsRegressor as KNR
knr_energy = KNR(weights='distance').fit(X_train_energy_pca, y_train_energy)
y_pred_knr = knr_energy.predict(X_test_energy_pca)
print("Mean squared error for kNN: {:.3f}.".format(
    mean_squared_error(y_pred_knr, y_test_energy)))

#Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor as DTR
dtr_energy = DTR(max_depth=11,
                 min_samples_split=16,
                 min_samples_leaf=2,
                 random_state=37).fit(X_train_energy_stand, y_train_energy)
y_pred_dtr = dtr_energy.predict(X_test_energy_stand)
print("Mean squared error for DTR: {:.3f}.".format(
    mean_squared_error(y_pred_dtr, y_test_energy)))

#Random Forest Regressor
示例#24
0
from sklearn.neighbors import KNeighborsRegressor as KNR
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


data_train = np.loadtxt('data_train.csv', delimiter=",", dtype="float")
x = data_train[..., 0:20 ]
ss = MinMaxScaler()
x = ss.fit_transform(x)
y = data_train[...,20 ]


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=16)


model = KNR(n_neighbors=4)
model.fit(x_train,y_train)

rmse = np.sqrt(mse(y_train,model.predict(x_train)))
r2 = r2_score(y_train,model.predict(x_train))
rmset = np.sqrt(mse(y_test,model.predict(x_test)))
r2t = r2_score(y_test,model.predict(x_test))
print(model.predict(x_test))
print(y_test)
print(rmse)
print(rmset)

print(r2)
print(r2t)
示例#25
0
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from ngboost.evaluation import *
from ngboost.ngboost import NGBoost
from ngboost.learners import default_linear_learner, default_tree_learner
from ngboost.distns import Normal
from ngboost.scores import MLE, CRPS
from sklearn.neighbors import KNeighborsRegressor as KNR
from argparse import ArgumentParser

np.random.seed(1)

default_knr_learner = lambda: KNR()


def gen_data(n=50, bound=1, deg=3, beta=1, noise=0.9, intcpt=-1):
    x = np.linspace(-bound, bound, n)[:, np.newaxis]
    h = np.linspace(-bound, bound, n)[:, np.newaxis]
    e = np.random.randn(*x.shape) * (0.1 + 10 * np.abs(x))
    y = 50 * (x**deg) + h * beta + noise * e + intcpt
    return x, y.squeeze(), np.c_[h, np.ones_like(h)]


if __name__ == "__main__":

    argparser = ArgumentParser()
    argparser.add_argument("--n-estimators", type=int, default=301)
    argparser.add_argument("--lr", type=float, default=0.03)
    argparser.add_argument("--minibatch-frac", type=float, default=0.1)
    argparser.add_argument("--natural", action="store_true")
示例#26
0
def cross_validation(X, Y):

    from sklearn.metrics import mean_squared_error as mse
    from sklearn import cross_validation, linear_model
    from math import exp, log
    from sklearn.neighbors import NearestNeighbors as NN
    from sklearn.neighbors import KNeighborsClassifier as KNC
    from sklearn.neighbors import KNeighborsRegressor as KNR

    N = 31

    resultsX = []
    resultsId = []
    resultsRegr = []
    resultsRegr_ey = []
    resultsRegr_log = []
    results1NN, results2NN, results3NN = [], [], []
    resultsY = []

    kf = cross_validation.KFold(N, n_folds=10, random_state=True)
    #kf = cross_validation.LeaveOneOut(N) #LeaveOneOut == KFold(n, n_folds=n)
    for train_index, test_index in kf:
        X_train, Y_train = X[train_index], Y[train_index]
        X_test, Y_test = X[test_index], Y[test_index]
        print(X_train, X_test)

        resultsX.append(X_test)
        resultsId.append(Y_test)
        resultsY.append(Y_test)

        X_train_list = [[x] for x in X_train]

        regr = linear_model.LinearRegression().fit(X_train_list, Y_train)
        resultsRegr += [[float(regr.predict(x)) for x in X_test]]

        regr_ey = linear_model.LinearRegression().fit(np.exp(X_train_list),
                                                      Y_train)
        resultsRegr_ey += [[float(regr_ey.predict(np.exp(x))) for x in X_test]]
        # X_regr_ey = []
        # for x in X_test:
        # 	x_regr = float(regr_ey.predict(x))
        # 	if (x_regr >= 1):
        # 		X_regr_ey.append(log(x_regr))
        # 	else:
        # 		X_regr_ey.append(1)

        # resultsRegr_ey += [X_regr_ey]
        # resultsRegr_ey += [[log(float(regr_ey.predict(x))) for x in X_test]]
        # resultsRegr_ey += [[log(abs(float(regr_ey.predict(x)))) for x in X_test]]

        regr_log = linear_model.LinearRegression().fit(np.log(X_train_list),
                                                       Y_train)
        resultsRegr_log += [[
            float(regr_log.predict(np.log(x))) for x in X_test
        ]]
        #Y_train = np.asarray(Y_train, dtype="|S6")
        nb1NN = KNR(n_neighbors=1,
                    algorithm='ball_tree').fit(X_train_list, Y_train)
        results1NN += [[float(nb1NN.predict(x)[0]) for x in X_test]]

        nb2NN = KNR(n_neighbors=2,
                    algorithm='ball_tree').fit(X_train_list, Y_train)
        results2NN += [[float(nb2NN.predict(x)[0]) for x in X_test]]

        nb3NN = KNR(n_neighbors=3,
                    algorithm='ball_tree').fit(X_train_list, Y_train)
        results3NN += [[float(nb3NN.predict(x)[0]) for x in X_test]]

    #return [mse(resultsX, resultsId), mse(resultsX, resultsRegr), mse(resultsX, resultsRegr_ey), mse(resultsX, resultsRegr_log), mse(resultsX, results1NN), mse(resultsX, results2NN), mse(resultsX, results3NN)]
    return [
        mse(resultsX, resultsId),
        mse(resultsY, resultsRegr),
        mse(resultsY, resultsRegr_ey),
        mse(resultsY, resultsRegr_log),
        mse(resultsY, results1NN),
        mse(resultsY, results2NN),
        mse(resultsY, results3NN)
    ]