def main(): x_train, y_train, x_test, y_test = load_features() x_train_conc = concate_x(x_train) x_test_conc = concate_x(x_test) # log_reg = LogisticRegressionCV(verbose=1, penalty='l2', n_jobs=1) # linear_svc = linear_svc_cv(x_train_conc, y_train) rand_for = random_forest_cv(x_train_conc, y_train) # y_hat = one_vs_rest(x_train_conc, y_train, x_test_conc, log_reg) y_hat = one_vs_rest(x_train_conc, y_train, x_test_conc, rand_for) # y_hat = one_vs_rest(x_train_conc, y_train, x_test_conc, linear_svc) # y_hat = one_for_each_class(x_train_conc, y_train, x_test_conc, rand_for) # y_hat = one_for_each_class(x_train_conc, y_train, x_test_conc, linear_svc) # np.savetxt('test.csv', y_test, delimiter=',') # np.savetxt('hat.csv', y_hat, delimiter=',') print('Test accuracy: ', score.accuracy(y_hat, y_test)) print('F1 score: ', score.f1score(y_hat, y_test)) print('F1 score by class:') score_byclass = score.f1_by_class(y_hat, y_test) for c, class_score in enumerate(score_byclass): print(c, ':', class_score)
def learn(self): hypothesis = self.initial_hypothesis() current_score = 0.0 while True: next_hypothesis = self.best_rule(hypothesis) new_score = accuracy(next_hypothesis) getLogger(self._logger).info('RULE LEARNED: %s %s' % (next_hypothesis, new_score)) s = significance(next_hypothesis) if self._min_significance is not None and s < self._min_significance: break if new_score > current_score: hypothesis = next_hypothesis current_score = new_score else: break if self.interrupted: break if hypothesis.get_literal() and hypothesis.get_literal( ).functor == '_recursive': break # can't extend after recursive return hypothesis
def main(): batch_size = 64 nb_epoch = 20 img_size = 256 gen = read_data.read_data_photo_labels( 2000, img_size, batch_size = batch_size) X_test, Y_test = next(gen) # model = get_image_model(img_size) with open(JSON_NAME, 'r') as jfile: model = models.model_from_json(jfile.read()) model.load_weights(WEIGHTS_NAME) # model.fit_generator(gen, # samples_per_epoch=10000, nb_epoch=nb_epoch, verbose=1) test_pred = np.sign(model.predict(X_test)) test_loss = model.evaluate(X_test, Y_test) np.savetxt('pred.csv', test_pred, delimiter=',') # with open(JSON_NAME, 'w') as jfile: # jfile.write(model.to_json()) # model.save_weights(WEIGHTS_NAME, overwrite=True) print('Test loss: ', test_loss) print('Test accuracy: ', score.accuracy(test_pred, Y_test)) print('F1 score: ', score.f1score(test_pred, Y_test)) print('F1 score by class:') score_byclass = score.f1_by_class(test_pred, Y_test) for c, s in enumerate(score_byclass): print(c, ':', s)
def main(): batch_size = 64 nb_epoch = 20 img_size = 256 gen = read_data.read_data_photo_labels(2000, img_size, batch_size=batch_size) X_test, Y_test = next(gen) # model = get_image_model(img_size) with open(JSON_NAME, 'r') as jfile: model = models.model_from_json(jfile.read()) model.load_weights(WEIGHTS_NAME) # model.fit_generator(gen, # samples_per_epoch=10000, nb_epoch=nb_epoch, verbose=1) test_pred = np.sign(model.predict(X_test)) test_loss = model.evaluate(X_test, Y_test) np.savetxt('pred.csv', test_pred, delimiter=',') # with open(JSON_NAME, 'w') as jfile: # jfile.write(model.to_json()) # model.save_weights(WEIGHTS_NAME, overwrite=True) print('Test loss: ', test_loss) print('Test accuracy: ', score.accuracy(test_pred, Y_test)) print('F1 score: ', score.f1score(test_pred, Y_test)) print('F1 score by class:') score_byclass = score.f1_by_class(test_pred, Y_test) for c, s in enumerate(score_byclass): print(c, ':', s)
def test_accuracy_for_deliverable1b(): expected = 0.729 actual = accuracy(KEYFILES[ENGLISH], DELIVERABLE1b) assert_almost_equals( expected, actual, places=3, msg="Accuracy Incorrect for 1b: Expected %f, Actual %f" % (expected, actual))
def test_accuracy_for_deliverable2f(): expected = { GERMAN: 0.612, SPANISH: 0.662, ITALIAN: 0.657, FRENCH: 0.582, PORTO: 0.624 } SUFFIX = "deliverable2f.conll" lang, filename = getForeignLanguage(DIR, SUFFIX) actual = accuracy(KEYFILES[lang], os.path.join(DIR, filename)) ok_(expected[lang] <= (actual + 0.002), msg="Accuracy Incorrect for 2f: Expected %f, Actual %f" % (expected[lang], actual))
def test_accuracy_for_deliverable2c(): expected = { GERMAN: 0.432, SPANISH: 0.365, ITALIAN: 0.311, FRENCH: 0.372, PORTO: 0.305 } SUFFIX = "deliverable2c.conll" lang, filename = getForeignLanguage(DIR, SUFFIX) actual = accuracy(KEYFILES[lang], os.path.join(DIR, filename)) ok_(expected[lang] <= (actual + 0.002), msg="Accuracy Incorrect for 2c: Expected %f, Actual %f" % (expected[lang], actual)) # adding some tolerance.
def scoreModel(): try: testX = np.array(pd.read_csv('../data/testX.csv')) testY = np.array(pd.read_csv('../data/testY.csv'), dtype=int) except: print("No valid datasets were found") try: (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt, unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos, bigramTgtPos, trigramTgtPos) = corpf.loadNLP() except: print('No ngram models were found, making new ones...') (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt, unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos, bigramTgtPos, trigramTgtPos) = corpf.getNgramModels() method = input( 'What model would you like to use \nSee README for available options: ' ) if method == 'SVM': svm = lc.loadmodel('../data/svm.joblib') scores = lc.classCLF(svm, testX) elif method == 'LR': lr = lc.loadmodel('../data/lr.joblib') scores = lc.classCLF(lr, testX) elif method == 'MLP': mlp = lc.loadmodel('../data/mlp.joblib') scores = lc.classCLF(mlp, testX) elif method == 'NBC': NBC = lc.loadmodel('../data/NBC.joblib') scores = lc.NBC.classnb(NBC, testX) else: print( 'No valid method was given, please see the README for instrucions') fscore = sc.fscore(scores[0], testY) print('F1score:', fscore) accuracy = sc.accuracy(scores[0], testY) print('Accuracy:', accuracy) crossent = [] for i in range(len(testY)): correctEst = 0 if scores[0][i] == testY[i]: correctEst = 1 crossent.append(sc.crossEntropy(scores[1][i], correctEst)) print('CrossEnt:', np.mean(crossent)) return fscore, method, scores, crossent
def scoreSentences(): print("Warning this takes quite a while") try: cleandf = pd.read_csv('../data/cleandf.csv', dtype=object) except: fullDf = pd.read_csv("../data/en-nl.tsv", sep="\t") cleandf = cd.cleandata(fullDf) cleandf.to_csv('../data/cleandf.csv') try: (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt, unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos, bigramTgtPos, trigramTgtPos) = corpf.loadNLP() except: print('No ngram models were found, making new ones...') (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt, unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos, bigramTgtPos, trigramTgtPos) = corpf.getNgramModels() try: testX = np.array(pd.read_csv('../data/testX.csv')) testY = np.array(pd.read_csv('../data/testY.csv'), dtype=int) except: print("No valid datasets were found") scores = scoreder(cleandf, testX) fscore = sc.fscore(scores[0], testY) print('F1-score:', fscore) accuracy = sc.accuracy(scores[0], testY) print('Accuracy', accuracy) crossent = [] for i in range(len(testY)): correctEst = 0 if scores[0][i] == testY[i]: correctEst = 1 crossent.append(sc.crossEntropy(scores[1][i], correctEst)) print('Cross Entropy:', np.mean(crossent)) return
x_whole[idx] /= len(photo_ids) y_whole[idx] = label train_frac = 0.9 x_train, x_test = np.vsplit(x_whole, [int(num_biz * train_frac)]) y_train, y_test = np.vsplit(y_whole, [int(num_biz * train_frac)]) print('X_train shape:', x_train.shape) print('Y_train shape:', y_train.shape) print('Training on %s biz, testing on %s biz' % \ (x_train.shape[0], x_test.shape[0])) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_split=0.0) test_pred = np.sign(model.predict(x_test)) test_loss = model.evaluate(x_test, y_test) np.savetxt('pred.csv', test_pred, delimiter=',') print('Test loss: ', test_loss) print('Test accuracy: ', score.accuracy(test_pred, y_test)) print('F1 score: ', score.f1score(test_pred, y_test)) print('F1 score by class:') score_byclass = score.f1_by_class(test_pred, y_test) for c, score in enumerate(score_byclass): print(c, ':', score)
loss2 += link_constraints2 optimizer1.zero_grad() # clear gradients for next train loss1.backward() # backpropagation, compute gradients optimizer1.step() # apply gradients optimizer2.zero_grad() # clear gradients for next train loss2.backward() # backpropagation, compute gradients optimizer2.step() # apply gradients if step % 1 == 0: if Args.cuda: pred1 = torch.max(output1, 1)[1].cuda().data.squeeze() pred2 = torch.max(output2, 1)[1].cuda().data.squeeze() else: pred1 = torch.max(output1, 1)[1].data.squeeze() pred2 = torch.max(output2, 1)[1].data.squeeze() # evaluate accuracy1 = score.accuracy(pred1, y) accuracy2 = score.accuracy(pred2, y) F1_1 = score.F1(pred1, y) F1_2 = score.F1(pred2, y) print('Epoch: %s |step: %s | accuracy1: %.2f | F1: %.4f | accuracy2: %.2f | F1: %.4f |' %(epoch, step, accuracy1, F1_1, accuracy2, F1_2)) #%% Testing all_y = [] all_pred1 = [] all_pred2 = [] for step, (x, y) in enumerate(valid_loader): y = torch.squeeze(y) # delete a axis if Args.cuda: x, y = x.cuda(), y.cuda() diagnosis1.eval() # test model diagnosis2.eval() output1, _ = diagnosis1(x)
def probfoil(**kwargs): args = kwargs if 'seed' in args: seed = args['seed'] else: seed = str(random.random()) args['seed'] = seed random.seed(seed) logger = 'probfoil' if 'log' not in args: args['log'] = None logfile = None else: logfile = open(args['log'], 'w') if 'verbose' not in args: args['verbose'] = 0 if 'm' not in args: args['m'] = 1 if 'beam_size' not in args: args['beam_size'] = 5 if 'p' not in args: args['p'] = None if 'l' not in args: args['l'] = None if 'target' not in args: args['target'] = None if 'symmetry_breaking' not in args: args['symmetry_breaking'] = True if 'settings' in args: settings = args['settings'] del args['settings'] else: settings = None if 'train' in args: train = args['train'] del args['train'] else: train = None if 'test' in args: test = args['test'] del args['test'] else: test = None #settings = args['settings'] #train = args['train'] log = init_logger(verbose=args['verbose'], name=logger, out=logfile) log.info('Random seed: %s' % seed) # Load input files #data = DataFile(*(PrologFile(source) for source in args['files'])) data = DataFile(*(PrologString(source) for source in [settings, train])) if 'probfoil1' in args: learn_class = ProbFOIL else: learn_class = ProbFOIL2 time_start = time.time() learn = learn_class(data, logger=logger, seed=seed, log=args['log'], verbose=args['verbose'], m=args['m'], beam_size=args['beam_size'], p=args['p'], l=args['l']) hypothesis = learn.learn() time_total = time.time() - time_start # Store scores train_accuracy = accuracy(hypothesis) train_precision = precision(hypothesis) train_recall = recall(hypothesis) # Load test data if test != None: test_data = DataFile(*(PrologString(source) for source in [settings, test])) test = learn_class(test_data, logger=logger, seed=seed, log=args['log'], verbose=args['verbose'], m=args['m'], beam_size=args['beam_size'], p=args['p'], l=args['l']) test_hypothesis = test.test_rule(hypothesis) # Store scores test_accuracy = accuracy(test_hypothesis) test_precision = precision(test_hypothesis) test_recall = recall(test_hypothesis) print('================ SETTINGS ================') #for kv in vars(args).items(): for kv in args.items(): print('%20s:\t%s' % kv) if learn.interrupted: print('================ PARTIAL THEORY ================') else: print('================= FINAL THEORY =================') rule = hypothesis rules = rule.to_clauses(rule.target.functor) # First rule is failing rule: don't print it if there are other rules. if len(rules) > 1: for rule in rules[1:]: print(rule) else: print(rules[0]) print('==================== SCORES ====================') print(' Train Set') print(' Accuracy:\t', train_accuracy) print(' Precision:\t', train_precision) print(' Recall:\t', train_recall) if test != None: print(' Test Set') print(' Accuracy:\t', test_accuracy) print(' Precision:\t', test_precision) print(' Recall:\t', test_recall) print('================== STATISTICS ==================') for name, value in learn.statistics(): print('%20s:\t%s' % (name, value)) print(' Total time:\t%.4fs' % time_total) if logfile: logfile.close() #def main(argv=sys.argv[1:]): # args = argparser().parse_args(argv) # # if args.seed: # seed = args.seed # else: # seed = str(random.random()) # random.seed(seed) # # logger = 'probfoil' # # if args.log is None: # logfile = None # else: # logfile = open(args.log, 'w') # # log = init_logger(verbose=args.verbose, name=logger, out=logfile) # # log.info('Random seed: %s' % seed) # # # Load input files # data = DataFile(*(PrologFile(source) for source in args.files)) # # if args.probfoil1: # learn_class = ProbFOIL # else: # learn_class = ProbFOIL2 # # time_start = time.time() # learn = learn_class(data, logger=logger, **vars(args)) # # hypothesis = learn.learn() # time_total = time.time() - time_start # # print ('================ SETTINGS ================') # for kv in vars(args).items(): # print('%20s:\t%s' % kv) # # if learn.interrupted: # print('================ PARTIAL THEORY ================') # else: # print('================= FINAL THEORY =================') # rule = hypothesis # rules = rule.to_clauses(rule.target.functor) # # # First rule is failing rule: don't print it if there are other rules. # if len(rules) > 1: # for rule in rules[1:]: # print (rule) # else: # print (rules[0]) # print ('==================== SCORES ====================') # print (' Accuracy:\t', accuracy(hypothesis)) # print (' Precision:\t', precision(hypothesis)) # print (' Recall:\t', recall(hypothesis)) # print ('================== STATISTICS ==================') # for name, value in learn.statistics(): # print ('%20s:\t%s' % (name, value)) # print (' Total time:\t%.4fs' % time_total) # # if logfile: # logfile.close() # #def argparser(): # parser = argparse.ArgumentParser() # parser.add_argument('files', nargs='+') # parser.add_argument('-1', '--det-rules', action='store_true', dest='probfoil1', # help='learn deterministic rules') # parser.add_argument('-m', help='parameter m for m-estimate', type=float, # default=argparse.SUPPRESS) # parser.add_argument('-b', '--beam-size', type=int, default=5, # help='size of beam for beam search') # parser.add_argument('-p', '--significance', type=float, default=None, # help='rule significance threshold', dest='p') # parser.add_argument('-l', '--length', dest='l', type=int, default=None, # help='maximum rule length') # parser.add_argument('-v', action='count', dest='verbose', default=None, # help='increase verbosity (repeat for more)') # parser.add_argument('--symmetry-breaking', action='store_true', # help='avoid symmetries in refinement operator') # parser.add_argument('--target', '-t', type=str, # help='specify predicate/arity to learn (overrides settings file)') # parser.add_argument('-s', '--seed', help='random seed', default=None) # parser.add_argument('--log', help='write log to file', default=None) # # return parser # # #if __name__ == '__main__': # main()
for idx, (biz_id, (label, photo_ids)) in enumerate(biz_csv.items()): for photo_id in photo_ids: image_idx = image_label_order[str(photo_id)] x_whole[idx] += image_labels[image_idx] x_whole[idx] /= len(photo_ids) y_whole[idx] = label train_frac = 0.9 x_train, x_test = np.vsplit(x_whole, [int(num_biz*train_frac)]) y_train, y_test = np.vsplit(y_whole, [int(num_biz*train_frac)]) print('X_train shape:', x_train.shape) print('Y_train shape:', y_train.shape) print('Training on %s biz, testing on %s biz' % \ (x_train.shape[0], x_test.shape[0])) model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_split=0.0) test_pred = np.sign(model.predict(x_test)) test_loss = model.evaluate(x_test, y_test) np.savetxt('pred.csv', test_pred, delimiter=',') print('Test loss: ', test_loss) print('Test accuracy: ', score.accuracy(test_pred, y_test)) print('F1 score: ', score.f1score(test_pred, y_test)) print('F1 score by class:') score_byclass = score.f1_by_class(test_pred, y_test) for c, score in enumerate(score_byclass): print(c, ':', score)
def test_accuracy_for_deliverable2f (): expected = {GERMAN: 0.612, SPANISH: 0.662, ITALIAN: 0.657, FRENCH: 0.582, PORTO: 0.624} SUFFIX = "deliverable2f.conll" lang, filename = getForeignLanguage (DIR, SUFFIX) actual = accuracy (KEYFILES[lang], os.path.join (DIR, filename)) ok_ (expected[lang]<= (actual + 0.002), msg="Accuracy Incorrect for 2f: Expected %f, Actual %f" %(expected[lang], actual))
def test_accuracy_for_deliverable2c (): expected = {GERMAN: 0.432, SPANISH:0.365 , ITALIAN: 0.311, FRENCH: 0.372, PORTO: 0.305} SUFFIX = "deliverable2c.conll" lang, filename = getForeignLanguage (DIR, SUFFIX) actual = accuracy (KEYFILES[lang], os.path.join (DIR, filename)) ok_ (expected[lang] <= (actual + 0.002), msg="Accuracy Incorrect for 2c: Expected %f, Actual %f" %(expected[lang], actual)) # adding some tolerance.
def test_accuracy_for_deliverable1c (): expected = 0.82 actual = accuracy (KEYFILES[ENGLISH], DELIVERABLE1c) ok_(expected < (actual + 0.002), msg="Accuracy is lesser than expected for 1c: Expected %f, Actual %f" %(expected, actual))
def test_accuracy_for_deliverable1b (): expected = 0.729 actual = accuracy (KEYFILES[ENGLISH], DELIVERABLE1b) assert_almost_equals (expected, actual, places=3, msg="Accuracy Incorrect for 1b: Expected %f, Actual %f" %(expected, actual))
y = torch.squeeze(y) # delete a axis if Args.cuda: x, y = x.cuda(), y.cuda() diagnosis.train() # train model output = diagnosis(x) loss = loss_func(output, y) # loss optimizer.zero_grad() # clear gradients for next train loss.backward() # backpropagation, compute gradients optimizer.step() # apply gradients if step % 1 == 0: if Args.cuda: pred = torch.max(output, 1)[1].cuda().data.squeeze() else: pred = torch.max(output, 1)[1].data.squeeze() # evaluate accuracy = score.accuracy(pred, y) F1 = score.F1(pred, y) print( 'Epoch: %s |step: %s | train loss: %.2f | accuracy: %.2f | F1: %.4f' % (epoch, step, loss.data, accuracy, F1)) #%% Testing all_y = [] all_pred = [] for step, (x, y) in enumerate(valid_loader): y = torch.squeeze(y) # delete a axis if Args.cuda: x, y = x.cuda(), y.cuda() diagnosis.eval() # test model output = diagnosis(x) if Args.cuda: pred = torch.max(output, 1)[1].cuda().data.squeeze()
def test_accuracy_for_deliverable1c(): expected = 0.82 actual = accuracy(KEYFILES[ENGLISH], DELIVERABLE1c) ok_(expected < (actual + 0.002), msg="Accuracy is lesser than expected for 1c: Expected %f, Actual %f" % (expected, actual))