示例#1
0
def rate(title, body, source):
    # Get score from trained classifier
    clf = Classifier()
    classifier_score = clf.predict(title, body)
    # Get score from article searcher
    related_article, search_score = searcher_score(title, body)
    #search_score = min(0.9, search_score * 2)
    if source[:7] == "http://":
        source = source[7:]
    elif source[:8] == "https://":
        source = source[8:]

    first_slash_index = source.find("/")
    if first_slash_index != -1:
        source = source[:first_slash_index]

    print(source)
    is_trusted = is_from_trusted_source(source)
    print("Is trusted ", is_trusted)
    p = 0.55
    q = 0.3

    if is_trusted:
        return 10 * (classifier_score * p +
                     (1 - p - q) + search_score * q), related_article
    else:
        return 10 * (classifier_score * 0.7 +
                     search_score * 0.3), related_article
示例#2
0
def rate(title, body):
    # Get score from trained classifier
    clf = Classifier()
    classifier_score = clf.predict(title, body)

    # Get score from article searcher
    search_score = searcher_score(title, body)

    return classifier_score * 0.8 + search_score * 0.2
示例#3
0
文件: buildedu.py 项目: OlafLee/DPLP
def main(fmodel, fvocab, rpath, wpath):
    clf = Classifier()
    dr = DocReader()
    clf.loadmodel(fmodel)
    flist = [join(rpath,fname) for fname in listdir(rpath) if fname.endswith('conll')]
    vocab = load(gzip.open(fvocab))
    for (fidx, fname) in enumerate(flist):
        print "Processing file: {}".format(fname)
        doc = dr.read(fname, withboundary=False)
        sg = SampleGenerator(vocab)
        sg.build(doc)
        M, _ = sg.getmat()
        predlabels = clf.predict(M)
        doc = postprocess(doc, predlabels)
        writedoc(doc, fname, wpath)
示例#4
0
data.load_and_split(params['input_data_path'], params['labels_path'])
#xtrain=data.train_idxs
#xval=data.val_idxs
#batch = data.get_train_feed_dict('X','y','train',128)
#%%
cls = Classifier(params, data.Ndims, net=convnet2)
cls.train(data, epochs=10, batch_size=128)
#cls.load_weights_from_checkpoint(params['pre-traind_model_path'])

#%% Get The Test Data And Classify It
test_path = params['test_data_path']
labels = []
for batch_num in range(600):
    X = []
    for idx in range(500):
        img_path = test_path + str(batch_num * 500 + idx + 1) + '.png'
        X.append(misc.imread(img_path))
    X = np.array(X)
    X = (X - data.mean) / (data.std + 1e-7)
    preds = cls.predict(X)
    preds = np.argmax(preds, axis=1)
    for i in np.arange(preds.shape[0]):
        labels.append(data.decoding[preds[i]])
    print('Classified: ', batch_num * 500, 'out of 300000')
output = pd.DataFrame()
output['id'] = range(300000)
output['id'] = output['id'] + 1
output['label'] = labels
write_path = params['project_path'] + 'submissions/save1.csv'
output.to_csv(write_path, index=False)