def SVM_Classifier(x_train, y_train, x_test, y_test): tic() clf = svm.SVC(C=2, probability=True) clf.fit(x_train, y_train) toc() print 'Test Accuracy: %.2f' % clf.score(x_train, y_train) #Create ROC curve tic() pred_probas = clf.predict_proba(x_train)[:, 1] #score toc() # y_train = np.array(y_train) print type(y_train) print type(pred_probas) fpr, tpr, _ = metrics.roc_curve(y_train, pred_probas) roc_auc = metrics.auc(fpr, tpr) plt.plot(fpr, tpr, label='area = %.2f' % roc_auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.legend(loc='lower right') plt.savefig(fdir + 'image/svm_roc') plt.show() plt.close()
def pca_reducing(size, x): tic() pca = PCA(n_components=size) pca.fit(x) toc() #print pca.explained_variance_ratio_ # PCA drawing plt.figure(1, figsize=(4, 3)) plt.clf() plt.axes([.2, .2, .7, .7]) plt.plot(pca.explained_variance_, linewidth=2) plt.axis('tight') plt.xlabel('n_components') plt.ylabel('explained_variance_') plt.savefig(fdir + 'image/train_pca') # plt.show() plt.close() # get 100 dimensions according to the pca drawing x_pca = PCA(n_components=200).fit_transform(x) return x_pca
# coding=utf-8 """ Created on Mon Aug 13 11:10:39 2018 @author: 95647 """ import urllib from urllib.parse import quote from urllib.request import urlopen from bs4 import BeautifulSoup import re import pandas as pd import string from mytictoc import tic, toc tic() headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} url = "https://book.douban.com/tag/历史" s = quote(url,safe=string.printable) req = urllib.request.Request(s, headers=headers) html = urlopen(req) # print(html.read().decode("utf-8")) bsObj = BeautifulSoup(html,'lxml') items = bsObj.findAll("li",class_="subject-item") book_info = [] for item in items: info = [] titles = item.find("a",title = re.compile(".*")).contents if len(titles)> 1 : bookname = str(titles[0].strip()) + str(titles[1].text.strip()) else: