Exemplo n.º 1
0
def getresult(*a):
    from sklearn.neighbors import KNeighborsClassifier as kn
    import pandas as pd

    df = pd.read_csv(r'C:\Users\Admin\Desktop\Laxman\log\iris.csv')
    #print(df)
    x = df.iloc[:, :4]
    #print(x)
    y = df.iloc[:, 4]
    y = y.replace('setosa', 0)
    y = y.replace('versicolor', 1)
    y = y.replace('virginica', 2)
    #print(y)

    mymodule = kn()
    mymodule.fit(x, y)

    i = [1, 5, 4, 2]
    r = mymodule.predict([a])
    print('r=', r)
    return r
Exemplo n.º 2
0
def knn(DataSmoteMSample,k) :
	nbrs = kn(n_neighbors = k,algorithm='ball_tree').fit(DataSmoteMSample)
	distance, indices = nbrs.kneighbors(DataSmoteMSample)
	return indices
    'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
    'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
    'pH', 'sulphates', 'alcohol'
]]
y = data['quality']
clf1 = ExtraTreesClassifier(n_estimators=82,
                            max_depth=None,
                            min_samples_split=1,
                            random_state=0)
clf2 = RandomForestClassifier(random_state=0,
                              n_estimators=250,
                              min_samples_split=1)
clf3 = GradientBoostingClassifier(n_estimators=82,
                                  learning_rate=0.1,
                                  max_depth=1,
                                  random_state=0)
clf4 = GaussianNB()
clf5 = kn(n_neighbors=13)
test = pd.read_csv('red_test.csv')
x_test = test[[
    'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
    'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
    'pH', 'sulphates', 'alcohol'
]]
y_test = test['quality']
clf = VotingClassifier(estimators=[('et', clf1), ('rf', clf2), ('gb', clf3),
                                   ('gnb', clf4), ('kn', clf5)],
                       voting='soft',
                       weights=[14, 3, 1, 1, 3]).fit(x, y)
print(clf.score(x_test, y_test))
from math import *
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier as kn
import matplotlib.pyplot as plt
data = pd.read_csv('ice.csv')
x = data[['temp', 'street']]
y = data['ice']
clf = kn(n_neighbors=1)
clf.fit(x, y)
print(clf.score(x, y))
t = np.arange(0.0, 31.0)
plt.plot(t, y, '--', t, clf.predict(x), '-')
plt.show()
Exemplo n.º 5
0
train_data = train_data.drop(['timedelta'], axis=1) #remove 'url' information.

# train_data= train_data[train_data["shares"]<40000]

X = np.array(train_data.drop(['shares'], axis=1))
y = np.array(train_data['shares']) #This is the target
X = preprocessing.scale(X)

XTrain = X[:N,:] #use the first N samples for training
yTrain = y[:N]
XVal = X[N:,:] #use the rests for validation
yVal = y[N:]

Xtest = test_data.values
Xtest = preprocessing.scale(Xtest)

# print type(XTrain) matrix

for i in [1,3,5,10,20]:
    model = kn(n_neighbors =i ,n_jobs = -1)
    model.fit(XTrain,yTrain)
    training = model.predict(XTrain)
    validation = model.predict(XVal)

    print "KN " + str(i)
    print "Training error ", np.mean(np.abs(yTrain - training))
    print "Validation error ", np.mean(np.abs(yVal - validation))

    result = model.predict(Xtest)
    np.savetxt('result/resultKN'+ str(i) + '.txt', result)
print("Eigenvalues of x = \n", eigenvalues)
print("Eigenvectors of x = \n", eigenvectors)
file = []
for i in range(0, len(eigenvalues)):
    if eigenvalues[i] >= 1:
        file.append(i)
evalsfilt = eigenvalues[file]
print("Number of Dimesions used is ", (len(evalsfilt)))
transf = eigenvectors * la.inv(np.sqrt(np.diagflat(eigenvalues)))
print("Transformation Matrix = ", transf)
transf_matrix = matrix * transf
print("The Transformed x = ", transf_matrix)
xtx1 = transf_matrix.transpose() * transf_matrix
print("Expect an Identity Matrix = ", xtx1)
#Q3 - d
neigh = kn(n_neighbors=5, algorithm='brute', metric='euclidean')
nbrs = neigh.fit(transf_matrix)
kNNSpec = kNC(n_neighbors=5)
nbrsC = kNNSpec.fit(transf_matrix, np.array(no_of_frauds))
scor = nbrsC.score(transf_matrix, np.array(no_of_frauds))
print("The result of score function is " + str(round(scor, 4)))
#Q3 - e
focal = [7500, 15, 3, 127, 2, 2]
print("The focal observation is ", str(focal))
transfFocal = focal * transf
print("The Transformed focal observation is " + str(transfFocal))
myNeighbors_t = nbrs.kneighbors(transfFocal, return_distance=False)
print("The indices of the five neighbors of the focal are " +
      str(myNeighbors_t))
myNeighbors_t_values = matrix[myNeighbors_t]
print("The input and target values of the nearest neighbors are \n")
Exemplo n.º 7
0
print(file.shape)
print(file)
mnist=dict()
print(file.columns)
mnist['data']=file
mnist['target']=file['label']
del mnist['data']['label']
mnist['data']=mnist['data'].values
mnist['target']=mnist['target'].values
X,y=mnist['data'],mnist['target']
X_train,y_train,X_test,y_test=X[:50000],y[:50000],X[50000:],y[50000:]
import numpy as np
shuffle_index=np.random.permutation(50000)
X_train,y_train=X_train[shuffle_index],y_train[shuffle_index]
from sklearn.neighbors import KNeighborsClassifier as kn
knn_clf=kn()
X_train=X_train[:1000]
y_train=y_train[:1000]
knn_clf.fit(X_train,y_train)
y_pred=knn_clf.predict(X_train)
from sklearn.model_selection import GridSearchCV as grid
param=[{'weights':['distance'],'n_neighbors':[2,3,4],'p':[4,5,6]}]
kn_clf=kn()
grid_clf=grid(kn_clf,param,cv=2,scoring='neg_mean_squared_error')
print('start!')
import time
t1=time.time()
grid_clf.fit(X_train,y_train)
t2=time.time()
from matplotlib import pyplot as plt
print(grid_clf.best_estimator_)
Exemplo n.º 8
0
                                                    y,
                                                    test_size=1 / 5,
                                                    random_state=0)
# print(y_train)
print('样本总数:{},训练样本数:{},测试样本数:{}'.format(len(fruits_df), len(y_train),
                                         len(y_test)))

# 2、可视化查看特征变量
# sns.pairplot(data= fruits_df, hue='fruit_name', vars=['mass', 'width', 'height', 'color_score'])
# plt.tight_layout()
# plt.show()

# 3、建立模型
from sklearn.neighbors import KNeighborsClassifier as kn

knn = kn(n_neighbors=5)

# 4、训练模型
knn.fit(X_train, y_train)

# 5、测试模型
y_pred = knn.predict(X_test)  #c测试集中预测y的值

from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, y_pred)  # 真实值与预测值对比后,得到准确率
print('准确率:{}'.format(acc))

# 6、查看k值对结果的影响
k_range = range(1, 20)
acc_score = []
Exemplo n.º 9
0
from sklearn.neighbors import KNeighborsClassifier as kn
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score as ac
#plt.style.use('ggplot')
iris = datasets.load_iris()
type(iris)
print(iris.keys())
iris.data.shape
iris.target_names
# EDA
X = iris.data
y = iris.target
y = y.astype(float)
df = pd.DataFrame(X, columns=iris.feature_names)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=1 / 3,
                                                    random_state=6)

knn = kn(n_neighbors=6)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)
#visualization

#plt.scatter(X_train,y_train,color='red')
#plt.plot(X_train,knn.predict(X_train),color= 'blue')
print("Accuracy using Knn AT N=6 is: ")
ac(y_test, pred)

#print(df.head())
#pd.scatter_matrix(df,c=y,figsize= [9,9],s= 150,marker= 'D')
Exemplo n.º 10
0
plt.show()

plt.scatter(ts_x, ts_y)
plt.plot(ts_x[np.argsort(ts_x)], prob_ts[:, 1][np.argsort(ts_x)], color='red')
plt.xlabel("X")
plt.ylabel("Y")
plt.title("HW2test Scatter Plot and Prob(y = 1|x)")
plt.show()

# ## (C)
# ### (1)

# In[24]:

for i in [1, 3, 9]:
    model_kn = kn(n_neighbors=i, weights='uniform',
                  algorithm='auto').fit(x_train, y_train.ravel())

    gen_x = np.linspace(0, 100, num=1000)
    pred_knn_gen = model_kn.predict(gen_x.reshape(1000, -1))
    pred_knn_gen_pr = model_kn.predict_proba(gen_x.reshape(1000, -1))

    ts_x = np.array(df_test['X'])
    ts_y = np.array(df_test['Y'])
    prob_ts_pr = model_kn.predict_proba(ts_x.reshape(ts_size[0], -1))
    prob_ts = model_kn.predict(ts_x.reshape(ts_size[0], -1))

    print("KNN Accuracy for HW2train using score() function- %.2f%%" %
          (model_kn.score(x_train, y_train) * 100))
    print("KNN(%d) Accuracy for HW2test %.2f%%" %
          (i, model_kn.score(ts_x.reshape(ts_size[0], -1), ts_y) * 100))
Exemplo n.º 11
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier as kn
data = pd.read_csv('red.csv')
x = data[[
    'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
    'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
    'pH', 'sulphates', 'alcohol'
]]
y = data['quality']
clf1 = GaussianNB()
clf2 = ExtraTreesClassifier(n_estimators=82,
                            max_depth=None,
                            min_samples_split=1,
                            random_state=0)
clf3 = RandomForestClassifier(random_state=0,
                              n_estimators=250,
                              min_samples_split=1)
clf4 = kn(n_neighbors=13)
clf = VotingClassifier(estimators=[('gnb', clf1), ('et', clf2), ('rf', clf3),
                                   ('kn', clf4)],
                       voting='soft',
                       weights=[1, 8, 2, 1]).fit(x, y)
test = pd.read_csv('red_test.csv')
x = test[[
    'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
    'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
    'pH', 'sulphates', 'alcohol'
]]
y = test['quality']
print(clf.score(x, y))
Exemplo n.º 12
0
from sklearn.svm import SVC

train = train.sample(frac=1)
train = train.head(5000)

x = train.drop('label', axis=1)
y = train['label']

from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=101)

st_scaler = StandardScaler()

x_train = st_scaler.fit_transform(x_train)
x_test = st_scaler.transform(x_test)

model = kn()
model.fit(x_train, y_train)
predictions = model.predict(x_test)

print(accuracy_score(y_test, predictions))

x = model.predict(test)
l = {'ImageId': imageId, 'label': x}
df = pd.DataFrame(l)
df.to_csv('sub_for_dig.csv', index=False)
Exemplo n.º 13
0
data.head()

# In[5]:

image = data.iloc[:, 1:]
label = data.iloc[:, :1]

# # using knn to classify the images

# In[8]:

image = data.iloc[:, 1:]
label = data.iloc[:, :1]
from sklearn.neighbors import KNeighborsClassifier as kn

knn = kn(n_neighbors=10)
x_train, x_test, y_train, y_test = train_test_split(image,
                                                    label,
                                                    test_size=0.2,
                                                    random_state=100)
knn.fit(x_train, y_train)

# In[ ]:

predic = knn.predict(x_test)

# In[ ]:

from sklearn import metrics

# In[ ]: