Exemplo n.º 1
0
def main(train_dir, labels=['spam', 'ham']):
    print('Train dir: %s' % train_dir)
    paths = {}
    for label in labels:
        paths[label] = []
    for label, path in read_training_data(train_dir):
        if label == None:
            print('Label is None for Path=%s' % path)
        paths[label].append(path)

    model = NaiveBayesModel(paths)
    fname = 'nbmodel.txt'
    model.save_to_path(fname)
Exemplo n.º 2
0
def main(data_dir, model_path, out_path):
    print("Reading data from %s\nReading model from %s\n" \
        "Storing output to %s" % (data_dir, model_path, out_path))
    model = NaiveBayesModel.load_from_path(model_path)
    data = read_training_data(data_dir)
    with open(out_path, 'w', 1, encoding=ENCODING) as outf:
        for label, path in data:
            with open(path, 'r', encoding=ENCODING) as inf:
                doc = inf.read()
            preds = model.predict(doc)
            maxlabel = max(preds, key=preds.get)
            outf.write("%s %s\n" % (maxlabel, path))