def test_evaluation_with_scoring(self): """ Evaluate the grammar on all examples, collecting metrics: semantics oracle accuracy: # of examples where one parse or the other was correct. semantics accuracy: # of examples where parse at position 0 was correct. """ arithmetic_grammar = Grammar(self.arithmetic_rules) from executor import Executor arithmetic_model = Model(grammar=arithmetic_grammar, feature_fn=Parse.operator_precedence_features, weights=self.weights, executor=Executor.execute) from experiment import evaluate_model metrics = evaluate_model(model=arithmetic_model, examples=self.one_parse_examples + self.two_parse_examples) self.assertEqual(metrics['semantics oracle accuracy'], 17) self.assertEqual(metrics['semantics accuracy'], 16) # Improvement
def test_feature_function(self): from experiment import evaluate_model from metrics import denotation_match_metrics from scoring import Model from geo880 import geo880_train_examples rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives + self.rules_reverse_joins) grammar = Unit3Grammar(rules=rules, annotators=self.annotators) def empty_denotation_feature(parse): features = defaultdict(float) if parse.denotation == (): features['empty_denotation'] += 1.0 return features weights = {'empty_denotation': -1.0} model = Model(grammar=grammar, feature_fn=empty_denotation_feature, weights=weights, executor=self.geobase.executor().execute) metric_values = evaluate_model(model=model, examples=geo880_train_examples, metrics=denotation_match_metrics(), print_examples=False) self.assertEqual(235, metric_values['denotation accuracy'])
def test_evaluate_model(self): from experiment import evaluate_model from metrics import denotation_match_metrics from scoring import Model from geo880 import geo880_train_examples rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives + self.rules_reverse_joins) grammar = Unit3Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) # Set print_examples=True and look for 'what state has the shortest # river?' and evaluate_model(model=model, examples=geo880_train_examples[:10], metrics=denotation_match_metrics(), print_examples=False)
feed_dict = {stories[0]: trainX[0], queries[0]: trainX[1], stories[1]: trainX[2], queries[1]: trainX[3], answers[0]: trainY[0], answers[1]: trainY[1]} return sess.run(loss_op, feed_dict=feed_dict) def predictf(trainX): feed_dict = {stories[0]: trainX[0], queries[0]: trainX[1], stories[1]: trainX[2], queries[1]: trainX[3]} return sess.run(logits, feed_dict=feed_dict) def savef(filepath): return modelsaver.save(sess, filepath) def restoref(filepath): modelsaver.restore(sess, filepath) return model(fitf=fitf, testf=testf, predictf=predictf, savef=savef, restoref=restoref) ver, qas, repeat = handlesysargs('en/hn', None, 10) #embedding_matrix, word_idx = pre_train_embedding(EMBEDDING_SIZE, ver) def wmethod(): global word_idx return word_idx evaluate_model(compile_model, ver, qas, pad=1, wmethod='concat', flatten=0, repeat=repeat, E2E=1);
# Using last C as W per adjacent weight tying # func = lambda x:tf.matmul(x, tf.transpose(embedlayer.get_weights()[0], [1,0])) # dl = Lambda(func)(newu) pred = Activation('softmax')(dl) model = Model(input=[story_input, query_input], output=[pred]) # opt = Adam(lr=0.001, # beta_1=0.9, # beta_2=0.999, # epsilon=1e-08, # decay=0.0) opt = SGD(lr=0.0, momentum=0.0, decay=0.0, nesterov=False) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) return model, [LearningRateScheduler(step_decay)] # return model, None ver = 'en' qas = None ver, qas, repeat = handlesysargs(ver, qas) evaluate_model(compile_model, ver, qas, pad=1, wmethod=None, flatten=0, word=0, repeat=repeat);
hnques = RNN(EMBED_HIDDEN_SIZE, return_sequences=False)(hnques) hnques = RepeatVector(story_maxlen2)(hnques) enres = merge([ensent, enques], mode='sum') hnres = merge([hnsent, hnques], mode='sum') srnn = RNN(EMBED_HIDDEN_SIZE, return_sequences=False) enrnnout = srnn(enres) hnrnnout = srnn(hnres) do = Dropout(0.3) endoout = do(enrnnout) hndoout = do(hnrnnout) enout = Dense(vocab_size1, activation='softmax')(endoout) hnout = Dense(vocab_size2, activation='softmax')(hndoout) model = Model([ensinput, enqinput, hnsinput, hnqinput], [enout, hnout]) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model, None ver = 'en/hn' qas = None ver, qas, repeat = handlesysargs(ver, qas) evaluate_model(compile_model, ver, qas, wmethod='concat', repeat=repeat)
def restoref(filepath): modelsaver.restore(sess, filepath) return model(fitf=fitf, testf=testf, predictf=predictf, savef=savef, restoref=restoref) ver, qas, repeat = handlesysargs('en', None, 10) embedding_matrix, word_idx = pre_train_embedding(EMBEDDING_SIZE, ver, method='external') def wmethod(): global word_idx return word_idx evaluate_model(compile_model, ver, qas, pad=1, wmethod=wmethod, flatten=0, repeat=repeat, E2E=1)
do = Dropout(0.3, seed=_seed) endoout = do(enrnnout) hndoout = do(hnrnnout) dense = Dense(vocab_size, activation=None, kernel_initializer=INITIALIZER) enout = dense(endoout) hnout = dense(hndoout) enout = Activation('softmax')(enout) hnout = Activation('softmax')(hnout) model = Model([ensinput, enqinput, hnsinput, hnqinput], [enout, hnout]) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return modelwrapper(model) ver = 'en/hn' qas = None ver, qas, repeat = handlesysargs(ver, qas) evaluate_model(compile_model, ver, qas, pad=1, wmethod='concat', repeat=repeat, stop_early=0)
model = Model( input=[story_input1, query_input1, story_input2, query_input2], output=preds) opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) # opt = SGD(lr=0.0, # momentum=0.0, # decay=0.0, # nesterov=False) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) # return model, [LearningRateScheduler(step_decay)] return model, None ver = 'en/hn' qas = None ver, qas, repeat = handlesysargs(ver, qas) evaluate_model(compile_model, ver, qas, pad=1, wmethod='concat', flatten=0, repeat=repeat, limit=500)
best_mpr = 1 for epoch in range(args.epochs): model.fit(item_user, show_progress=False) if args.m is not 'bpr': loss = implicit._als.calculate_loss(item_user.T.tocsr(), model.user_factors, model.item_factors, model.regularization) print('Epoch {}, Loss {}'.format(epoch, loss)) if not test_mode: R_hat = model.user_factors @ (model.item_factors.T) top_N_recs = np.array(np.argsort(-R_hat, axis=1)[:, :args.top_n]).tolist() MAP, rec_at_k, mpr_all, mpr_mask = evaluate_model( R_hat, top_N_recs, data['val'], data['val_masked']) del R_hat wandb.log({ 'MAP@N': MAP, 'Recall@N': rec_at_k, 'MPR (all)': mpr_all, 'MPR (new)': mpr_mask }) if mpr_mask < best_mpr: wandb.run.summary["best_mpr"] = mpr_mask best_mpr = mpr_mask if test_mode: MAP, rec_at_k, mpr_all, mpr_mask = evaluate_model(R_hat, top_N_recs, data['test'], data['test_masked'])
#qrnn = Dropout(0.3, seed=_seed)(qrnn) #qrnn = RNN(query_maxlen, return_sequences=False, # kernel_initializer=INITIALIZER, # bias_initializer=INITIALIZER, # recurrent_initializer=RINITIALIZER)(qinput) qrnn = RepeatVector(story_maxlen)(qinput) qrnn = Flatten()(qrnn) out = merge([sentrnn, qrnn], mode='sum') #out = RNN(EMBED_HIDDEN_SIZE, return_sequences=False, # kernel_initializer=INITIALIZER, # bias_initializer=INITIALIZER, # recurrent_initializer=RINITIALIZER)(out) #out = Dropout(0.3, seed=_seed)(out) out = Dense(vocab_size, kernel_initializer=INITIALIZER, activation='softmax')(out) model = Model([sentinp, qinput], [out]) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return modelwrapper(model) ver = 'en' qas = None ver, qas, repeat = handlesysargs(ver, qas) evaluate_model(compile_model, ver, qas, repeat=repeat)
queries: trainX[1], answers: trainY[0]} return sess.run(loss_op, feed_dict=feed_dict) def predictf(trainX): feed_dict = {stories: trainX[0], queries: trainX[1]} return sess.run(logits, feed_dict=feed_dict) def savef(filepath): return modelsaver.save(sess, filepath) def restoref(filepath): modelsaver.restore(sess, filepath) return model(fitf=fitf, testf=testf, predictf=predictf, savef=savef, restoref=restoref) ver, qas, repeat = handlesysargs('en', None, 10) #embedding_matrix, word_idx = pre_train_embedding(EMBEDDING_SIZE, ver) def wmethod(): global word_idx return word_idx evaluate_model(compile_model, ver, qas, pad=1, wmethod=None, flatten=0, repeat=repeat, E2E=1, stop_early=1);
do = Dropout(0.3) endoout = do(enrnnout) hndoout = do(hnrnnout) shuffleddoout = do(shuffledrnnout) enout = Dense(vocab_size1, activation='softmax')(endoout) hnout = Dense(vocab_size2, activation='softmax')(hndoout) shuffledout = Dense(vocab_size3, activation='softmax')(shuffleddoout) model = Model([ ensinput, enqinput, hnsinput, hnqinput, shuffledsinput, shuffledqinput ], [enout, hnout, shuffledout]) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model, None ver = 'en/hn/shuffled' qas = None ver, qas, repeat = handlesysargs(ver, qas) evaluate_model(compile_model, ver, qas, pad=1, wmethod='concat', flatten=1, repeat=repeat)