def rate(title, body, source): # Get score from trained classifier clf = Classifier() classifier_score = clf.predict(title, body) # Get score from article searcher related_article, search_score = searcher_score(title, body) #search_score = min(0.9, search_score * 2) if source[:7] == "http://": source = source[7:] elif source[:8] == "https://": source = source[8:] first_slash_index = source.find("/") if first_slash_index != -1: source = source[:first_slash_index] print(source) is_trusted = is_from_trusted_source(source) print("Is trusted ", is_trusted) p = 0.55 q = 0.3 if is_trusted: return 10 * (classifier_score * p + (1 - p - q) + search_score * q), related_article else: return 10 * (classifier_score * 0.7 + search_score * 0.3), related_article
def rate(title, body): # Get score from trained classifier clf = Classifier() classifier_score = clf.predict(title, body) # Get score from article searcher search_score = searcher_score(title, body) return classifier_score * 0.8 + search_score * 0.2
def main(fmodel, fvocab, rpath, wpath): clf = Classifier() dr = DocReader() clf.loadmodel(fmodel) flist = [join(rpath,fname) for fname in listdir(rpath) if fname.endswith('conll')] vocab = load(gzip.open(fvocab)) for (fidx, fname) in enumerate(flist): print "Processing file: {}".format(fname) doc = dr.read(fname, withboundary=False) sg = SampleGenerator(vocab) sg.build(doc) M, _ = sg.getmat() predlabels = clf.predict(M) doc = postprocess(doc, predlabels) writedoc(doc, fname, wpath)
data.load_and_split(params['input_data_path'], params['labels_path']) #xtrain=data.train_idxs #xval=data.val_idxs #batch = data.get_train_feed_dict('X','y','train',128) #%% cls = Classifier(params, data.Ndims, net=convnet2) cls.train(data, epochs=10, batch_size=128) #cls.load_weights_from_checkpoint(params['pre-traind_model_path']) #%% Get The Test Data And Classify It test_path = params['test_data_path'] labels = [] for batch_num in range(600): X = [] for idx in range(500): img_path = test_path + str(batch_num * 500 + idx + 1) + '.png' X.append(misc.imread(img_path)) X = np.array(X) X = (X - data.mean) / (data.std + 1e-7) preds = cls.predict(X) preds = np.argmax(preds, axis=1) for i in np.arange(preds.shape[0]): labels.append(data.decoding[preds[i]]) print('Classified: ', batch_num * 500, 'out of 300000') output = pd.DataFrame() output['id'] = range(300000) output['id'] = output['id'] + 1 output['label'] = labels write_path = params['project_path'] + 'submissions/save1.csv' output.to_csv(write_path, index=False)