def test_agent_batch(robot, game, model, budget): i = 0 queried_x = [] queried_y = [] performance = [] test_sents = utilities.data2sents(game.test_x, game.test_y) while i < budget: sel_ind = random.randint(0, len(game.train_x)) # construct the observation observation = game.getFrame(model) action = robot.getAction(observation) if action[1] == 1: sentence = game.train_x[sel_ind] labels = game.train_y[sel_ind] queried_x.append(sentence) queried_y.append(labels) i += 1 train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) # train a crf and evaluate it train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) print "***TEST", performance
def test_agent_online(robot, game, model, budget): # to address game -> we have a new game here i = 0 queried_x = [] queried_y = [] performance = [] test_sents = utilities.data2sents(game.test_x, game.test_y) while i < budget: sel_ind = random.randint(0, len(game.train_x)) # construct the observation observation = game.getFrame(model) action = robot.getAction(observation) if action[1] == 1: sentence = game.train_x[sel_ind] labels = game.train_y[sel_ind] queried_x.append(sentence) queried_y.append(labels) i += 1 train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) reward, observation2, terminal = game.feedback(action, model) # game robot.update(observation, action, reward, observation2, terminal) # train a crf and evaluate it train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) print "***TEST", performance
def test_agent_batchRandom(game, model, budget, selfstudy): i = 0 queried_x = [] queried_y = [] performance = [] test_sents = utilities.data2sents(game.test_x, game.test_y) while i < budget: sel_ind = random.randint(0, len(game.train_x)-1) sentence = game.train_x[sel_ind] if i<=20 or not selfstudy: labels = game.train_y[sel_ind] else: items = sentence.split() pl = model.pred([items]) labels = [int(x) for x in pl[0]] queried_x.append(sentence) queried_y.append(labels) i += 1 train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) # train a crf and evaluate it train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) print "***TEST", performance return performance
def test_agent_batchNew(robot, game, model, budget, selfstudy): i = 0 queried_x = [] queried_y = [] performance = [] game.setRandomOrder() test_sents = utilities.data2sents(game.test_x, game.test_y) while i < budget and game.currentFrame < len(game.train_x): #sel_ind = random.randint(0, len(game.train_x)) # construct the observation #observation = game.getFrame(model) observation = game.get_frame(model) sentence, labels = game.getCurrentFrameSentence(1) #action = robot.getAction(observation) action = robot.get_action(observation) if action[1] == 1: #sentence = game.train_x[sel_ind] #labels = game.train_y[sel_ind] if i>100 and selfstudy: items = sentence.split() pl = model.pred([items]) labels = pl[0] queried_x.append(sentence) queried_y.append(labels) i += 1 train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) # train a crf and evaluate it train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) print "***TEST", performance return performance
def test_agent_compare(robot, game, model1, model2, budget, selfstudy): i1 = 0 i2 = 0 queried_x1 = [] queried_y1 = [] performance1 = [] queried_x2 = [] queried_y2 = [] performance2 = [] game.setRandomOrder() test_sents = utilities.data2sents(game.test_x, game.test_y) while (i1 < budget or i2 < budget) and game.currentFrame < len(game.train_x): observation = game.get_frame(model1) sentence, labels = game.getCurrentFrameSentence(1) #handling model1 if i1 < budget: action = robot.get_action(observation) if action[1] == 1: print 'DQN selects instance %d'%game.currentFrame labels1 = labels if i1>20 and selfstudy: items = sentence.split() pl = model1.pred([items]) labels1 = [int(x) for x in pl[0]] queried_x1.append(sentence) queried_y1.append(labels1) train_sents1 = utilities.data2sents(queried_x1, queried_y1) model1.train(train_sents1) performance1.append(model1.test(test_sents)) i1 += 1 #handling model2 if i2 < budget: act = random.randint(0, 1) if act == 1: print 'Randomly select instance %d'%game.currentFrame labels2 = labels if i2>20 and selfstudy: items = sentence.split() pl = model2.pred([items]) labels2 = [int(x) for x in pl[0]] queried_x2.append(sentence) queried_y2.append(labels2) train_sents2 = utilities.data2sents(queried_x2, queried_y2) model2.train(train_sents2) performance2.append(model2.test(test_sents)) i2 += 1 print "***TEST1", performance1 print "***TEST2", performance2 return performance1, performance2
def test_agent_onlineNew(robot, game, model, budget, selfstudy): # to address game -> we have a new game here i = 0 queried_x = [] queried_y = [] performance = [] game.setRandomOrder() test_sents = utilities.data2sents(game.test_x, game.test_y) while i < budget: #sel_ind = random.randint(0, len(game.train_x)) # construct the observation sentence, labels = game.getCurrentFrameSentence(0) observation = game.get_frame(model) action = robot.get_action(observation) if action[1] == 1: if i>20 and selfstudy: items = sentence.split() pl = model.pred([items]) labels = [int(x) for x in pl[0]] queried_x.append(sentence) queried_y.append(labels) i += 1 train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) reward, observation2, terminal = game.feedback(action, model, False) # game robot.update(observation, action, reward, observation2, terminal) # train a crf and evaluate it train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) print "***TEST", performance return performance
def test_agent_batchRandomNew(game, model, budget, selfstudy): i = 0 sel_ind = 0 queried_x = [] queried_y = [] performance = [] game.setRandomOrder() test_sents = utilities.data2sents(game.test_x, game.test_y) while i < budget and sel_ind < len(game.train_x): act = random.randint(0, 1) if act == 1: print 'Selecting instance %d'%sel_ind sentence = game.train_x[game.order[sel_ind]] labels = game.train_y[game.order[sel_ind]] if i>20 and selfstudy: items = sentence.split() pl = model.pred([items]) labels = [int(x) for x in pl[0]] queried_x.append(sentence) queried_y.append(labels) train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) i += 1 else: print 'Skiping instance %d'%sel_ind sel_ind +=1 # train a crf and evaluate it train_sents = utilities.data2sents(queried_x, queried_y) model.train(train_sents) performance.append(model.test(test_sents)) print "***TEST", performance return performance
def dreaming_phase(train_la, train_la_idx, train_pool, train_pool_idx, sent_dev, budget, episodes, agent, expert): logger.info(' * Start Dreaming phase * ') states = [] actions = [] for tau in range(0, episodes): # Shuffle and split initial train, validation set sent_trn = list(train_la) idx_trn = list(train_la_idx) sent_pool = list(train_pool) idx_pool = list(train_pool_idx) logger.info( "[Episode {}] Partition data: labeled = {}, val = {}, unlabeled pool = {} " .format(str(tau), len(sent_trn), len(sent_dev), len(sent_pool))) # Memory (two lists) to store states and actions tagger_dreamming = "{}/{}_tagger_temp.h5".format( args.output, DATASET_NAME) if os.path.exists(tagger_dreamming): os.remove(tagger_dreamming) model = CRFTagger(tagger_dreamming, num_classes=num_classes) if len(sent_trn) > 0: model.train(sent_trn) # In every episode, run the trajectory for t in range(0, budget): logger.info('[Dreaming phase] Episode:' + str(tau + 1) + ' Budget:' + str(t + 1)) row = 0 f1 = -1 # save the index of best data point or acturally the index of action bestindex = 0 # Random sample k points from D_pool if args.dreaming_candidate_selection_mode == 'random': logger.info(" * Random candidate selections") random_pool, random_pool_idx, queryindices = utilities.randomKSamples( sent_pool, idx_pool, k) elif args.dreaming_candidate_selection_mode == 'certainty': logger.info(" * Certainty candidate selections") random_pool, random_pool_idx, queryindices = utilities.sample_from_top_n_certainty( sent_pool, idx_pool, expert, args.n_learning, k) elif args.dreaming_candidate_selection_mode == 'mix': logger.info(" * Mix method candidate selections") c = np.random.rand(1) if c > 0.5: random_pool, random_pool_idx, queryindices = utilities.randomKSamples( sent_pool, idx_pool, k) else: random_pool, random_pool_idx, queryindices = utilities.sample_from_top_n_certainty( sent_pool, idx_pool, expert, args.n_learning, k) else: logger.info(" * Unknown mode, use Random candidate selections") random_pool, random_pool_idx, queryindices = utilities.randomKSamples( sent_pool, idx_pool, k) logger.debug(' * Generate label using expert') x_tokens = [' '.join(expert.sent2tokens(s)) for s in random_pool] y_labels = expert.predict(random_pool) pred_sents = utilities.data2sents(x_tokens, y_labels) for datapoint in zip(pred_sents, random_pool_idx): seq = datapoint[0] idx = datapoint[1] train_la_temp = list(sent_trn) train_la_temp.append(seq) if os.path.exists(tagger_temp): os.remove(tagger_temp) model_temp = CRFTagger(tagger_temp, num_classes=num_classes) model_temp.train(train_la_temp) f1_temp = model_temp.test(dev_sents, label2str) if (f1_temp > f1): bestindex = row f1 = f1_temp row = row + 1 del model_temp del train_la_temp gc.collect() # get the state and action state = utilities.getAllState(idx_trn, random_pool, random_pool_idx, model, w2v, max_len, num_classes) # action=bestindex coin = np.random.rand(1) if (coin > 0.5): logger.debug(' * Use the POLICY [coin = {}]'.format(str(coin))) action = bestindex else: action = agent.predict(args.k, state) states.append(state) actions.append(action) # update the model theindex = queryindices[bestindex] sent_trn.append(sent_pool[theindex]) idx_trn.append(idx_pool[theindex]) model.train(sent_trn) # delete the selected data point from the pool del sent_pool[theindex] del idx_pool[theindex] cur_actions = to_categorical(np.asarray(actions), num_classes=k) agent.train_policy(args.k, states, cur_actions) del sent_pool del idx_pool del sent_trn del idx_trn gc.collect() return agent
label2str = utilities.IBO_label2str selected_modules = [ "sentence_cnn", "marginal_prob_cnn", "labeled_pool", "policy_net", "entropy_cnn", "entropy_embedding" ] num_classes = 9 else: logger.info("Processing data - IO scheme") train_x, train_y, train_lens = utilities.load_data2labels_IO(train_file) test_x, test_y, test_lens = utilities.load_data2labels_IO(test_file) dev_x, dev_y, dev_lens = utilities.load_data2labels_IO(dev_file) label2str = utilities.IO_label2str selected_modules = None num_classes = 5 train_sents = utilities.data2sents(train_x, train_y) test_sents = utilities.data2sents(test_x, test_y) dev_sents = utilities.data2sents(dev_x, dev_y) logger.info("Training size: {}".format(len(train_sents))) # build vocabulary logger.info("Max document length:".format(max_len)) vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( max_document_length=max_len, min_frequency=1) # vocab = vocab_processor.vocabulary_ # start from {"<UNK>":0} train_idx = list(vocab_processor.fit_transform(train_x)) dev_idx = list(vocab_processor.fit_transform(dev_x)) vocab = vocab_processor.vocabulary_ vocab.freeze() test_idx = list(vocab_processor.fit_transform(test_x))