Exemplo n.º 1
0
 def predict(self, test_file, output_file):
     total = 0.0
     wfp = open(output_file, "w")
     fp = open(test_file)
     positive = 0
     negative = 0
     true_positive = 0
     false_positive = 0
     true_negative = 0
     false_negative = 0
     sys.stderr.write("start to predict ...\n")
     me = model_evaluate.ModelEvaluate()
     while True:
         line = fp.readline()
         if len(line) <= 0:
             break
         arr = line.strip('\r\n').split('\t')
         label = int(arr[0])
         test_sample = Sample()
         test_sample.load_sample(arr[1:], label, self.__word_dict)
         p = self.__classifier(test_sample)
         wfp.write("%d\t%f\n" % (label, p))
         total += 1
         if p > 0.5:
             p = 1
         else:
             p = -1
         me.add(int(label), p)
     me.report()
     fp.close()
     wfp.close()
    parser.add_argument("-t", "--test_file", help="test file")
    parser.add_argument("-o", "--output_file", help="output file")
    args = parser.parse_args()

    train_file = args.train_file
    test_file = args.test_file
    dict_file = args.dict_file
    output_file = args.output_file
    if not train_file or not os.path.exists(train_file):
        parser.print_help()
        sys.exit(1)
    if not test_file or not os.path.exists(test_file):
        parser.print_help()
        sys.exit(1)
    if not dict_file or not os.path.exists(dict_file):
        parser.print_help()
        sys.exit(1)

    me = model_evaluate.ModelEvaluate()
    naive_bayes = NaiveBayes()
    naive_bayes.train_model(train_file, dict_file)
    fp = open(test_file)
    while True:
        line = fp.readline()
        if len(line) <= 0:
            break
        label, words = naive_bayes.get_articel_words(line)
        l, p = naive_bayes.predict(words)
        me.add(int(label), int(l))
    me.report()