Пример #1
0
    #law = train_SVC(vec, law_label)
    print('time SVC')
    sys.stdout.flush()
    #time = train_SVC(vec, time_label)

    #test
    print('predict')
    sys.stdout.flush()
    predictor = PredictorLocal(tfidf, accu, law, time)
    test_label, test_predict = predictor.predict_file(test_filename)

    #metrics
    judge = Judger("../baseline/accu.txt", "../baseline/law.txt")
    result = judge.test2(test_label, test_predict)
    print(result)
    rst = judge.get_score(result)

    print(rst)
    rstr = "ACCU:(%.4f, %.4f, %.4f); LAW:(%.4f, %.4f, %.4f) TIME: %.4f"% \
            (rst[0][0], rst[0][1], rst[0][2], rst[1][0], rst[1][1], rst[1][2], rst[2])

    sinfo = 'Prog:%s TrainFile:%s Seg:%s DIM:%s NGRAM:%d RESULT: %s' % (
        sys.argv[0], train_fname, seg_method, dim, ngram, rstr)
    logger.info(sinfo)

    print('begin test model:')
    print('saving model')
    joblib.dump(tfidf, 'predictor/model/tfidf.model')
    joblib.dump(accu, 'predictor/model/accu.model')
    joblib.dump(law, 'predictor/model/law.model')
    joblib.dump(time, 'predictor/model/time.model')
Пример #2
0
class Evaluator(object):
    def __init__(self, predictor, input_path='./input', output='./out'):
        self.predictor = predictor
        self.input_path = input_path
        self.output_path = output
        self.judger = Judger('./data/accu.txt', './data/law.txt')
        self.cnt = 0

    def format_result(self, result):
        rex = {"accusation": [], "articles": [], "imprisonment": -3}

        res_acc = []
        for x in result["accusation"]:
            if not (x is None):
                res_acc.append(int(x))
        rex["accusation"] = res_acc

        if not (result["imprisonment"] is None):
            rex["imprisonment"] = int(result["imprisonment"])
        else:
            rex["imprisonment"] = -3

        res_art = []
        for x in result["articles"]:
            if not (x is None):
                res_art.append(int(x))
        rex["articles"] = res_art

        return rex

    def get_batch(self):
        v = self.predictor.batch_size
        if not (type(v) is int) or v <= 0:
            raise NotImplementedError

        return v

    def solve(self, fact):
        result = self.predictor.predict(fact)

        for a in range(0, len(result)):
            result[a] = self.format_result(result[a])

        return result

    def output_result(self, file_name):
        inf = open(os.path.join(self.input_path, file_name), "r")
        ouf = open(os.path.join(self.output_path, file_name), "w")

        fact = []

        for line in inf:
            fact.append(json.loads(line)["fact"])
            if len(fact) == self.get_batch():
                result = self.solve(fact)
                self.cnt += len(result)
                for x in result:
                    print(json.dumps(x), file=ouf)
                fact = []

        if len(fact) != 0:
            result = self.solve(fact)
            self.cnt += len(result)
            for x in result:
                print(json.dumps(x), file=ouf)
            fact = []

        ouf.close()

    def scoring(self, file_name):
        result = self.judger.test(self.input_path, self.output_path, file_name)
        return self.judger.get_score(result)