#law = train_SVC(vec, law_label) print('time SVC') sys.stdout.flush() #time = train_SVC(vec, time_label) #test print('predict') sys.stdout.flush() predictor = PredictorLocal(tfidf, accu, law, time) test_label, test_predict = predictor.predict_file(test_filename) #metrics judge = Judger("../baseline/accu.txt", "../baseline/law.txt") result = judge.test2(test_label, test_predict) print(result) rst = judge.get_score(result) print(rst) rstr = "ACCU:(%.4f, %.4f, %.4f); LAW:(%.4f, %.4f, %.4f) TIME: %.4f"% \ (rst[0][0], rst[0][1], rst[0][2], rst[1][0], rst[1][1], rst[1][2], rst[2]) sinfo = 'Prog:%s TrainFile:%s Seg:%s DIM:%s NGRAM:%d RESULT: %s' % ( sys.argv[0], train_fname, seg_method, dim, ngram, rstr) logger.info(sinfo) print('begin test model:') print('saving model') joblib.dump(tfidf, 'predictor/model/tfidf.model') joblib.dump(accu, 'predictor/model/accu.model') joblib.dump(law, 'predictor/model/law.model') joblib.dump(time, 'predictor/model/time.model')
class Evaluator(object): def __init__(self, predictor, input_path='./input', output='./out'): self.predictor = predictor self.input_path = input_path self.output_path = output self.judger = Judger('./data/accu.txt', './data/law.txt') self.cnt = 0 def format_result(self, result): rex = {"accusation": [], "articles": [], "imprisonment": -3} res_acc = [] for x in result["accusation"]: if not (x is None): res_acc.append(int(x)) rex["accusation"] = res_acc if not (result["imprisonment"] is None): rex["imprisonment"] = int(result["imprisonment"]) else: rex["imprisonment"] = -3 res_art = [] for x in result["articles"]: if not (x is None): res_art.append(int(x)) rex["articles"] = res_art return rex def get_batch(self): v = self.predictor.batch_size if not (type(v) is int) or v <= 0: raise NotImplementedError return v def solve(self, fact): result = self.predictor.predict(fact) for a in range(0, len(result)): result[a] = self.format_result(result[a]) return result def output_result(self, file_name): inf = open(os.path.join(self.input_path, file_name), "r") ouf = open(os.path.join(self.output_path, file_name), "w") fact = [] for line in inf: fact.append(json.loads(line)["fact"]) if len(fact) == self.get_batch(): result = self.solve(fact) self.cnt += len(result) for x in result: print(json.dumps(x), file=ouf) fact = [] if len(fact) != 0: result = self.solve(fact) self.cnt += len(result) for x in result: print(json.dumps(x), file=ouf) fact = [] ouf.close() def scoring(self, file_name): result = self.judger.test(self.input_path, self.output_path, file_name) return self.judger.get_score(result)