Пример #1
0
def predict_image(classifier, test_histogram_of_features):
    '''
        To predict the test image with classifier

        Parameters
        ----------
        classifier : object
            Classifier object after being trained with normalized histogram
            of features
        test_histogram_of_features : ndarray
            Array containing all histogram of features from test image
        train_names : list
            List containing the names of image classes available

        Returns
        -------
        str
            Image prediction result of the test image
    '''
    id = classifier.predict(test_histogram_of_features)
    y = np.array([0, 2, 1, 0, 0, 1, 2, 0, 2, 1])
    print(id)
    print(y)
    acc = ac(id, y) * 100
    print('accuracy: ', acc, '%')
Пример #2
0
 def SVMEvaluation(self, y_dev, X_dev):
     # obtenemos precision, recall y f1 comparando el gold standard (y_dev) con las predicciones
     predicted = self.model.predict(X_dev)
     bPrecis, bRecall, bFscore, bSupport = pr(y_dev, predicted, average='macro')
     # mostramos resultados
     bAcuracy = ac(y_dev, predicted)
     print(classification_report(y_dev, predicted))
     print(bAcuracy,bPrecis,bRecall,bFscore)
def evaluate(pred_vals, true_vals, pred_prob):
    precision, recall, thresholds = metrics.precision_recall_curve(
        true_vals, pred_prob)
    return [
        mcc(true_vals, pred_vals),
        metrics.f1_score(true_vals, pred_vals),
        metrics.precision_score(true_vals, pred_vals),
        ac(true_vals, pred_vals),
        metrics.roc_auc_score(true_vals, pred_prob),
        metrics.auc(recall, precision)
    ]
Пример #4
0
def dataTest(model_dir, test_generator, test_size, input_num, dims_num,
             batch_size):
    model = load_model(model_dir,
                       custom_objects={
                           'auc': auc,
                           'binary_PFA': binary_PFA,
                           'binary_PTA': binary_PTA
                       })
    labels_pre = []
    labels_true = []
    batch_num = test_size // batch_size + 1
    steps = 0
    for batch, labels in test_generator:
        if len(labels) == batch_size:
            labels_pre.extend(model.predict_on_batch(batch))
        else:
            batch = np.concatenate(
                (batch,
                 np.zeros((batch_size - len(labels), input_num, dims_num))))
            labels_pre.extend(model.predict_on_batch(batch)[0:len(labels)])
        labels_true.extend(labels)
        steps += 1
        print("%d/%d batch" % (steps, batch_num))
    labels_pre = np.array(labels_pre).round()

    def to_y(labels):
        y = []
        for i in range(len(labels)):
            if labels[i][0] == 1:
                y.append(0)
            else:
                y.append(1)
        return y

    y_true = to_y(labels_true)
    y_pre = to_y(labels_pre)
    precision = precision_score(y_true, y_pre)
    recall = recall_score(y_true, y_pre)
    accuracy = accuracy_score(y_true, y_pre)
    f1 = f1_score(y_true, y_pre)

    print("Precision score is :", precision)
    print("Recall score is :", recall)
    print("Accuracy score is : ", accuracy)
    print("F1 score is : ", f1)

    fpr, tpr, thresholds = roc_curve(y_true, y_pre)
    roc_auc = ac(fpr, tpr)
    print("FP rate is :", fpr)
    print("TP rate is :", tpr)
    print("Roc_AOC is :", roc_auc)
Пример #5
0
axs[1, 4].set_title('alcohol')

y = df["quality"]
df = df.drop(columns="quality")
x = df

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15)

from sklearn import neighbors
from sklearn.metrics import accuracy_score as ac
from sklearn.metrics import confusion_matrix

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=1)
x_train = lda.fit_transform(x_train, y_train)
x_test = lda.fit_transform(x_test, y_test)
temp, var = 0, 0
ans = []
from sklearn.ensemble import RandomForestClassifier as RFC
for i in range(1, 20):
    cl = RFC(max_depth=i, random_state=0)
    cl.fit(x_train, y_train)
    y_pred = cl.predict(x_test)
    if (temp < ac(y_pred, y_test)):
        temp = ac(y_pred, y_test)
        var = i
        ans = y_pred
print(str(temp) + " " + str(var))
print(confusion_matrix(y_pred, y_test))
Пример #6
0
        gaps = np.abs(result[:, 0] - tmp_y)
        MEAN = mean_and_std.loc[data.columns[0], "Mean"]
        STD = mean_and_std.loc[data.columns[0], "Std"]
        N = attribute_N.loc[data.columns[0], "N"]
        label = data.iloc[299: -1, 1].values

        y_pred = [1 if gap > MEAN + N*STD else 0 for gap in gaps]
        # 打印单维预测信息
        aims[299:-1, read_file_list.index(file)] = y_pred
        y_pred = filter(y_pred)
        print("---------------------")
        print(data.columns[0], " 调整之后的预测精度展示:")
        print("N = ", N)
        print("mean = ",MEAN)
        print("std = ",STD)
        print("Test acc score: {:.6f}".format(ac(label, y_pred)))
        print("Test p score: {:.6f}".format(p(label, y_pred)))
        print("Test r score: {:.6f}".format(r(label, y_pred)))
        print("confusion matrix:")
        print(confusion_matrix(label, y_pred))

    # 计算并存储最后一个阈值
    source_label = source_label
    aims = np.sum(aims, axis=1)
    f = open('./data/end_N.txt')
    end_N = int(float(f.read()))
    f.close()
    # f1分数
    end_N = 5
    aim_label = [1 if aim > end_N else 0 for aim in aims]
Пример #7
0
    print("训练集样本大小为:", train_x.shape[0])
    print("训练集正常样本大小为:", train_x.shape[0] - np.sum(train_y))
    print("训练集异常样本大小为:", np.sum(train_y))
    print("测试集样本大小为:", test_x.shape[0])
    print("测试集正常样本大小为:", test_x.shape[0] - np.sum(test_y))
    print("测试集异常样本大小为:", np.sum(test_y))

    # 训练并保存模型
    k_means = KMeans(n_clusters=2)
    k_means.fit(train_x, train_y)
    joblib.dump(k_means, "kmeans_model.pkl")

    # 预测
    y_pred = k_means.predict(test_x)
    print("--------------------")
    print("预测结果为:")
    print("Test acc score: {:.6f}".format(ac(test_y, y_pred)))
    print("Test p score: {:.6f}".format(p(test_y, y_pred)))
    print("Test r score: {:.6f}".format(r(test_y, y_pred)))
    print("confusion matrix:")
    print(confusion_matrix(test_y, y_pred))
    '''
    预测结果为:
    Test acc score: 0.549053
    Test p score: 0.023267
    Test r score: 0.416620
    confusion matrix:
    [[76638 62088]
     [ 2071  1479]]
    '''
Пример #8
0
train_x, train_y = final_data(list_image_count, train_x, train_y)


for i in train_y:
    if i==1:
        x=np.array([0,1])
        t.append(x)
    if i==0:
        x=np.array([1,0])
        t.append(x)
train_y = np.array(temp)

model = Sequential()
model.add(Conv2D(32, strides=2, kernel_size=(3,3), activation='relu', input_shape=(210,160,5)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, strides=2, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(2048, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(final_train_data_x, final_train_data_y_ohe)
pred_y = model.predict(final_train_data_x)
pred_y=pred_y.astype(int)
accuracy = ac(final_train_data_y_ohe, pred_y)
print('Accuracy :', accuracy)

model.save('final_model')

#model = load_model('my_model.h5')
Пример #9
0
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test_cal, y_pred_lg)

# In[48]:

print('confusion_matrix')
print(cm)

# In[49]:

from sklearn.metrics import accuracy_score as ac

# In[50]:

accuracy_logistic_calibration = ac(y_test_cal, y_pred_lg)

# In[54]:

print('Accuracy')
print(accuracy_logistic_calibration)

# In[102]:

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
knn.fit(X_train_calP, y_train_cal)

y_pred_knn = knn.predict(X_test_calP)
cm_knn = confusion_matrix(y_test_cal, y_pred_knn)
accuracy_knn = ac(y_test_cal, y_pred_knn)
Пример #10
0

# Saving the cosine similarities in an array
cos_dis = []
for i in range(0, 9000):
    text1 = corpus[i][0]
    text2 = corpus[i][1]
    vector1 = text_to_vector(text1)
    vector2 = text_to_vector(text2)
    cosine = get_cosine(vector1, vector2)
    cos_dis.append([cosine])
cos_dis = np.asarray(cos_dis)
Y = dataset.iloc[0:9000, 2:3]

df = pd.DataFrame(cos_dis)
df.to_csv('output.csv')

# Splitting the dataset in training and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cos_dis, Y, test_size=0.2)

# Using random forest classifer for classfication problem
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, criterion='entropy')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

# Calculating the accuracy
from sklearn.metrics import accuracy_score as ac
score = ac(y_test, y_pred)
Пример #11
0
	temp = open(path4 + x, 'r')
	temp = temp.read()
	X_test.append(temp)


x_train =  X_train
y_train = Y_train
x_test = X_test
y_test = Y_test

print("bow initiated")

vect = fe.text.CountVectorizer(max_features = 2000)
X_train_dtm = vect.fit_transform(x_train)
pd.DataFrame(X_train_dtm.toarray(), columns=vect.get_feature_names())
X_test_dtm = vect.transform(x_test)
pd.DataFrame(X_test_dtm.toarray(), columns=vect.get_feature_names())





# creating and training logistic regression model
print("training begins")
BNBC = BernoulliNB()
BNBC.fit(X_train_dtm, y_train)
print("test begins")
y_predicted = BNBC.predict(X_test_dtm)

print(ac(y_test, y_predicted))
Пример #12
0
import pandas as pd
from sklearn.tree import DecisionTreeClassifier as dc
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score as ac
from sklearn.externals import joblib

music_data = pd.read_csv('music.csv')
x = music_data.drop(columns=['genre'])
y = music_data['genre']
x_train, x_test, y_train, y_test = tts(x, y, test_size=0.2)

model = dc()
model.fit(x_train, y_train)
predictions = model.predict(x_test)
score = ac(y_test, predictions)
print(score)
joblib.dump(model, 'music-recommender.joblib')
model = joblib.load('music-recommender.joblib')
predictions = model.predict([[32, 1]])
predictions
#TODO: Test the dataset accuracy using train_test_split() in sklearn.
X = df.values[:, 0:12]
y = df.values[:, 12]

#TODO: Decision tree test part 1.
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.3, random_state=100)
dt_classify_gini = dtc(criterion="gini",
                       random_state=100,
                       max_depth=5,
                       min_samples_leaf=5)
dt_classify_gini.fit(X_train, y_train)
y_predict_gini = dt_classify_gini.predict(X_test)

print("\nDesicion Tree using Gini Index [70:30]\nAccuracy is ",
      ac(y_test, y_predict_gini) * 100)

#TODO: Decision tree test part 2.
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=100)
dt_classify_gini = dtc(criterion="gini",
                       random_state=100,
                       max_depth=5,
                       min_samples_leaf=5)
dt_classify_gini.fit(X_train, y_train)
y_predict_gini = dt_classify_gini.predict(X_test)

print("\nDesicion Tree using Gini Index [80:20]\nAccuracy is ",
      ac(y_test, y_predict_gini) * 100)

#TODO: Decison tree test part 3.
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.1, random_state=100)
Пример #14
0
#Cross validation
xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                y,
                                                test_size=0.10,
                                                random_state=0)
'''print(xtrain)
print(xtest)
print(ytrain)
print(ytest)'''
'''__init__(n_components=None, copy=True, iterated_power=3, whiten=False, random_state=None)'''
'''pca = RandomizedPCA(n_components=90)
pca.fit(xtrain)   
train_res = pca.fit_transform(xtrain)
#xtest = (np.float32(xtest[:])/255.)
test_res = pca.transform(xtest)

classifier = svm.SVC(gamma = 0.01,C=3, kernel='rbf')
classifier.fit(train_res,ytrain)

expected=ytest
predicted=classifier.predict(test_res)

ac = accuracy_score(expected,predicted)
print ac'''
#SVM
model = svm.SVC(kernel='poly')
model.fit(xtrain, ytrain)
pred = model.predict(xtest)
print(" accuracy is", ac(pred, ytest) * 100)
Пример #15
0
# 得到测试数据的真实标签
sources = np.zeros((142276, 24))
for i in range(len(read_file_list)):
    data = pd.read_csv(read_file_list[i], engine="python")["Class"]
    sources[:, i] = data.values
sources = np.sum(sources, axis=1)
source_label = [1 if source > 0 else 0 for source in sources]

num_3 = np.load("num_3.txt.npy")
num_N = np.load("num_N.txt.npy")
one_3 = np.load("one_3.txt.npy")
one_N = np.load("one_N.txt.npy")

print("--------------------------------------------")
print("num_3")
print("Test acc score: {:.6f}".format(ac(source_label, num_3)))
print("Test p score: {:.6f}".format(p(source_label, num_3)))
print("Test r score: {:.6f}".format(r(source_label, num_3)))
data = pd.DataFrame()
data["y_true"] = source_label
data["y_pred"] = num_3
print("TP", data[(data["y_pred"] == 1) & (data["y_true"] == 1)].shape[0])
print("FP", data[(data["y_pred"] == 1) & (data["y_true"] == 0)].shape[0])

print("--------------------------------------------")
print("num_N")
print("Test acc score: {:.6f}".format(ac(source_label, num_N)))
print("Test p score: {:.6f}".format(p(source_label, num_N)))
print("Test r score: {:.6f}".format(r(source_label, num_N)))
data = pd.DataFrame()
data["y_true"] = source_label
Пример #16
0
def ml(request):
	z = False
	#data = Document1.objects.all()
	#data = serializers.serialize( "python", Document1.objects.all())
	d = os.getcwd()
	file1 = [f for f in os.listdir(d) if f.endswith('.csv')]
	try:	
		data = pd.read_csv(file1[0])
	except:
		return HttpResponse("<h2><center> Please upload the file </center> </h2>")	
	col = []
	for i in data.columns:
		col.append(i)
	test = False
	t1 = 'k'
	if(request.method == 'POST'):
		try:	
			t1 = request.POST['test']
			#t1 = t1.encode('ascii')
		except:
			print("No value entered")
	if(t1=='Y'):
		test = True
		Label = LabelName.objects.values_list('Label')[0]
		#Label = Label[0].encode('ascii')
		col.remove(Label)
		return render(request, 'index.html', {'test2':test,'col':col})
	elif(t1 == 'N'):
		os.remove(file1[0])
		os.remove('model.pkl')
		test = True
		LabelName.objects.all().delete()
		return render(request, 'index.html', {'test4':test})
	if(request.method == 'POST'):
		Preprocess = request.POST['Preprocess']
		Label = request.POST['Label Name']
		#Label = Label.encode('ascii')
		#u'label' -> label ascii
		split = request.POST['split']
		z = True
		obj = LabelName(Preprocess = Preprocess,Label = Label, split = split)
		obj.save()
	if(z):
		le = LabelEncoder()
		sc = StandardScaler()
		print("Printing label after decoding {}".format(Label))
		if(data[Label].dtype == 'object'):
			data[Label] = le.fit_transform(data[Label]) 
		y = data[Label].values
		#print(y)
		for i in data.columns:
			c1 = re.findall(r"unnamed",i.lower())
			c2 = re.findall(r"id",i.lower())
			if(len(c1)>0 or len(c2)>0):
				data = data.drop(i,axis = 1)
		if(Preprocess == 'Y'):
			dop = []
			for i in data.keys():
				s = []
				if(data[i].isna().sum()>0):
					if(data[i].dtypes == 'int64' or data[i].dtypes == 'float64'):
						data[i].fillna(data[i].mean(),inplace = True)
					elif(data[i].dtypes == 'object'):
						data[i].fillna(data[i].mode()[0],inplace = True)			
				if(data[i].dtype == 'object'):
					for j in data[i]:
						s.append(len(j))
				else:
					s.append(0)
				if(max(s) > 30):
					dop.append(i)
			for z1 in dop:
				print("Dropping the column : ", z1)
				data = data.drop(z1,axis = 1)
			for i in data.keys():
				if(data[i].dtype == 'object'):
					data[i] = le.fit_transform(data[i])
		#data1 = data.astype('float64')
		X = data.drop(Label,axis=1)
		X = X.values
		from sklearn.model_selection import train_test_split
		X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = int(split))
		X_train = sc.fit_transform(X_train)
		X_test = sc.fit_transform(X_test)
		from sklearn.linear_model import LogisticRegression
		lr  = LogisticRegression()
		lr.fit(X_train,y_train)
		y_pred = lr.predict(X_test)
		from sklearn.metrics import accuracy_score as ac
		ac1 = ac(y_test,y_pred)
		#LabelName.objects.all().delete()
		#os.remove(file1[0])
		joblib.dump(lr, "model.pkl")
		return render(request,'data.html',{'z':z,'label':Label,'ac1':ac1})
	return render(request,'data.html',{'col':col})
Пример #17
0
from sklearn.neighbors import KNeighborsClassifier as kn
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score as ac
#plt.style.use('ggplot')
iris = datasets.load_iris()
type(iris)
print(iris.keys())
iris.data.shape
iris.target_names
# EDA
X = iris.data
y = iris.target
y = y.astype(float)
df = pd.DataFrame(X, columns=iris.feature_names)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=1 / 3,
                                                    random_state=6)

knn = kn(n_neighbors=6)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)
#visualization

#plt.scatter(X_train,y_train,color='red')
#plt.plot(X_train,knn.predict(X_train),color= 'blue')
print("Accuracy using Knn AT N=6 is: ")
ac(y_test, pred)

#print(df.head())
#pd.scatter_matrix(df,c=y,figsize= [9,9],s= 150,marker= 'D')
Пример #18
0
                temp2.pop(pair[0])
                temp2.append(temp)
                temp2 = np.array(temp2)
                temp2=temp2.flatten()
                temp2=temp2.astype(float)
                temp2=list(temp2)
                final_train_x.append(temp2)
                final_train_y.append(temp_data_y[j+6])
    
    return np.array(final_train_x), np.array(final_train_y)

train_x, train_y = final_data(list_image_count, train_x, train_y)

model = svm.SVC()
model.fit(train_x, train_y)
predicted_y = model.predict(test_x)
accuracy = ac(train_y, predicted_y)
print('Guassian Kernel :', accuracy)
model = svm.svm.LinearSVC()
model.fit(train_x, train_y)
predicted_y = model.predict(test_x)
accuracy = ac(train_y, predicted_y)
print('Linear Kernel :', accuracy)

model.save('final_model')

#model = load_model('my_model.h5')



Пример #19
0
clf.fit(X_final, y)
#Training regressor
train_regressor = pd.read_csv('polarity.csv').iloc[1400:1500,:]
y_regg = np.array(train_regressor['spam'])
X_regg = np.array(train_regressor['review'])
x_regg=X_regg
X_regg = tfidf.transform(X_regg)
polarity_regressor = sparse.csr_matrix(train_regressor['polarity'])
X_regg = sparse.hstack([X_regg,polarity_regressor.reshape(-1,1)], format='csr')
X_regg1=tfidf1.transform(x_regg)
val1=clf.predict(X_regg)
val2=clf1.predict(X_regg1)
val3=pd.concat([pd.DataFrame(val1),pd.DataFrame(val2)],axis=1).values
classifier = LinearRegression().fit(val3, y_regg)
#Testing the hybrid model together
final = pd.read_csv('polarity.csv').iloc[1500:,:]
y_test = np.array(final['spam'])
X_test = np.array(final['review'])
x_test=X_test
X_test = tfidf.transform(X_test)
polarity1 = sparse.csr_matrix(final['polarity'])
X_test = sparse.hstack([X_test,polarity1.reshape(-1,1)], format='csr')
X_test1=tfidf1.transform(x_test)
val1=clf.predict(X_test)
val2=clf1.predict(X_test1)
val3=pd.concat([pd.DataFrame(val1),pd.DataFrame(val2)],axis=1).values
predRF=classifier.predict(val3)
predRF=predRF>0.5
print('Accuracy score: {}'.format(ac(y_test, predRF)))
print('F1 score: {}'.format(f1(y_test, predRF)))
Пример #20
0
        sorted_dict = {
            k: v
            for k, v in sorted(
                dict_fit[key].items(), key=lambda item: item[1], reverse=True)
        }
        for i, key in enumerate(sorted_dict.keys()):
            if (3 < i < 25):
                print(features[int(key)], end=" ")


#indicators(dict_fit)

y_p = predict(dict_fit, df_test.values)

cm = cm(y_indexs_test, y_p)
print("Accuracy=", ac(y_indexs_test, y_p))

import matplotlib.pyplot as plt
import seaborn as sns

fig, ax = plt.subplots(figsize=(15, 10))
sns.heatmap(cm,
            annot=True,
            cmap="Set3",
            fmt="d",
            xticklabels=list(test.target_names),
            yticklabels=list(test.target_names))
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
X_train_Scaler=ss.fit_transform(X_train)
X_test_Scaler =ss.fit_transform(X_test) 

# first for regression

if(Which.lower()=="reg" or Which.lower()=="regression"):
    
    # multiRegression 
    
    from sklearn.linear_model import LinearRegression
    
    reg = LinearRegression()
    reg.fit(X_train, y_train)
    results.append(ac(y_test,reg.predict(X_test)))
    
    # polyregression
    
    from sklearn.preprocessing import PolynomialFeatures
    
    pf = PolynomialFeatures(degree=4)
    X_poly = pf.fit_transform(X_train)
    reg_poly = LinearRegression()
    reg_poly.fit(X_poly, y_train)
    results.append(ac(y_test, reg_poly.predict(pf.fit_transform(X_test))))
    
    # SVM
    
    from sklearn.svm import SVR 
    
Пример #22
0
model1.add(Dense(6, activation='relu', input_dim=11))
model1.add(Dense(6, activation='relu'))
model1.add(Dense(1, activation='sigmoid', input_dim=11))

model1.compile(optimizer='adam',
               loss='binary_crossentropy',
               metrics=['accuracy'])

model1.fit(X_train, y_train, batch_size=10, epochs=100)

y_pred = model1.predict(X_test)
y_pred = (y_pred > 0.5)

print('CM:', confusion_matrix(y_test, y_pred))
print('AC:', ac(y_test, y_pred))
print('F1 scores:', f1(y_test, y_pred))
print('PR:', prfs(y_test, y_pred))

#logistic Regression
from sklearn.linear_model import LogisticRegression as lr

model2 = lr().fit(X_train, y_train)
y_pred = model2.predict(X_test)
y_pred = (y_pred > 0.5)
print('CM:', confusion_matrix(y_test, y_pred))
print('AC:', ac(y_test, y_pred))
print('F1 scores:', f1(y_test, y_pred))
print('PR:', prfs(y_test, y_pred))

from sklearn.neighbors import KNeighborsClassifier as knn
Пример #23
0
X = processed_df.drop(['Survived'], axis=1)  # Features dataset
y = processed_df['Survived']  # Target variable

#Train Test split
from sklearn.cross_validation import train_test_split as tt_split
X_train, X_test, y_train, y_test = tt_split(X,
                                            y,
                                            test_size=0.2,
                                            random_state=100)

from sklearn.metrics import accuracy_score as ac
from sklearn.ensemble import RandomForestClassifier as RFC
RF_default = RFC(random_state=10)
RF_default.fit(X_train, y_train)
pred_dfault = RF_default.predict(X_test)
ac(y_test, pred_dfault)

RF_model1 = RFC(n_estimators=7,
                criterion="entropy",
                max_features=2,
                max_depth=9,
                random_state=999)

RF_model1.fit(X_train, y_train)
pred_model1 = RF_model1.predict(X_test)
pred_model1_train = RF_model1.predict(X_train)
ac(y_train, pred_model1_train)
ac(y_test, pred_model1)

from sklearn.model_selection import GridSearchCV
params = {
Пример #24
0
from sklearn.model_selection import train_test_split as tts

x_train1, x_test1, y_train1, y_test1 = tts(data_ml1, output)

from sklearn.neighbors import KNeighborsClassifier as knc

cla = knc()

cla.fit(x_train1, y_train1)

pred = cla.predict(x_test1)

from sklearn.metrics import accuracy_score as ac

acc = ac(y_test1, pred)

#%% Plotting hexbin for data 2 and patterns

day_section = []

for i in data2.hour:
    if i >= 0 and i < 3:
        day_section.append('night')
    elif i >= 3 and i < 6:
        day_section.append('deep night')
    elif i >= 6 and i < 9:
        day_section.append('earl morning')
    elif i >= 9 and i < 12:
        day_section.append('morning rush')
    elif i >= 12 and i < 15:
Пример #25
0
            acc.append(matthews_corrcoef(label, y_pred))
        acc = np.array(acc)
        index = np.where(acc == acc.max())
        accuracies.append(acc.max())
        best_threshold.loc[data.columns[0], "N"] = threshold[index[0][0]]

        # 打印预测信息
        N = threshold[index[0][0]]
        y_pred = [1 if gap > MEAN + N * STD else 0 for gap in gaps]
        # y_pred = filter(y_pred)
        print("---------------------")
        print(data.columns[0], " 调整之后的预测精度展示:")
        print("N = ", N)
        print("mean = ", MEAN)
        print("std = ", STD)
        print("Test acc score: {:.6f}".format(ac(label, y_pred)))
        print("Test p score: {:.6f}".format(p(label, y_pred)))
        print("Test r score: {:.6f}".format(r(label, y_pred)))
        print("confusion matrix:")
        print(confusion_matrix(label, y_pred))

        # 绘图
        plt.switch_backend('agg')
        plt.subplot(2, 1, 1)
        plt.plot(tmp_y, c="b")
        plt.plot(result[:, 0], c="r")
        plt.title(data.columns[0])
        plt.subplot(2, 1, 2)
        plt.plot(gaps, c="b")
        plt.plot((MEAN + N * STD) * np.ones(len(gaps)))
        plt.savefig("./single_result_picture/" + data.columns[0] + ".png")