def create_predict(HudongItem_csv): # 读取neo4j内容 db = Neo4j() db.connectDB() data_set = db.getLabeledHudongItem('labels.txt') classifier = Classifier('wiki.zh.bin') classifier.load_trainSet(data_set) classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10) predict_List = readCSVbyColumn(HudongItem_csv, 'title') file_object = open('predict_labels2.txt','a') count = 0 vis = set() for p in predict_List: cur = HudongItem(db.matchHudongItembyTitle(p)) if count > 200: break count += 1 if count <140 : continue title = cur.title if title in vis: continue vis.add(title) label = classifier.KNN_predict(cur) print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List))) file_object.write(str(title)+" "+str(label)+"\n") file_object.close()
def create_predict(HudongItem_csv): classifier = Classifier('wiki.zh.bin') data_set=[] f = open("Dataset1.txt", "rb") while 1: try: obj = pickle.load(f) data_set.append(obj) except: break f.close() classifier.load_trainSet(data_set) classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10) file_object = open('predict_labels.txt','a') count = 0 vis = set() file = open("cur_list1.txt", "rb") cur_list=[] while 1: try: obj = pickle.load(file) cur_list.append(obj) except: break file.close() for cur in cur_list: title = cur.title if title in vis: continue vis.add(title) label = classifier.KNN_predict(cur) print(str(title)+" "+str(label)+": "+str(count)+"/") file_object.write(str(title)+" "+str(label)+"\n") file_object.close()