import MongodbConn import cutWordsproject import random from sklearn import linear_model import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.linear_model import SGDClassifier from sklearn.metrics import roc_auc_score from sklearn.externals import joblib from sklearn.preprocessing import StandardScaler from sklearn.metrics import roc_auc_score import sys reload(sys) sys.setdefaultencoding('utf-8') load = cutWordsproject.DataParser() # conn = MongodbConn.MongoPipeline() # conn.open_connection("test") data_nagitive_111 = load.loadData('new_shiyan', 'nagitive_111') data_positive_111 = load.loadData('new_shiyan', 'positive_111') data_positive_kad = load.loadData('new_shiyan', 'positive_kad') data_nagitive_kad = load.loadData('new_shiyan', 'nagitive_kad') positive_111 = list() nagitive_111 = list() positive_kad = list() nagitive_kad = list() train = list() train_classify = list()
import random import MongodbConnLocal import cutWordsproject import MongodbConn from sklearn.feature_extraction.text import CountVectorizer from sklearn.decomposition import TruncatedSVD from scipy.io import mmwrite, mmread from sklearn import feature_extraction #<---------------------讲真,python要是再不对中文友好点我就把这几行代码写成宏--------------------------------> import sys reload(sys) sys.setdefaultencoding('utf-8') #<---------------------讲真,python要是再不对中文友好点我就把这几行代码写成宏--------------------------------> load = cutWordsproject.DataParser() #实例化加载数据类 webParser = cutWordsproject.ContextExtraction() #实例化正文提取类 wordsParser = cutWordsproject.CutWords() #实例化分词类 words2DataParser = cutWordsproject.Words2Data() #实例化量化类 def method1(): conn = MongodbConn.MongoPipeline() conn.open_connection("new_shiyan") # wordsParser.loadUsrWordsTable('wordsTable.txt') datas = load.loadData('new_shiyan', 'kangduoai_n_1') dataSet = list() # f = open('corups.txt', 'a') num = 0 for data in datas: if num % 100 == 0: