def learn(self): """ Train behavior model """ for t in range(self.num_replays): states_t, actions, rewards, next_states_t, dones = self.replay_buffer.sample( ) states_t = torch.stack(states_t).to(self.device) next_states_t = torch.stack(next_states_t).to(self.device) q_values = self.decision_model(states_t) q_values_next = self.policy_model(next_states_t) q_values_target = q_values.clone() for i in range(len(actions)): action = actions[i] q_values_target[i, action] = utils.target(rewards[i], self.gamma, q_values_next[i], self.targetPolicy, dones[i]) self.optimizer.zero_grad() loss = self.criterion(q_values, q_values_target.detach()) loss.backward() self.optimizer.step() utils.copy_model(self.decision_model, self.policy_model, tau=self.tau)
def __init__(self, agent_dict={}, actor_dict={}, critic_dict={}): """ Initialize Agent object Params ====== agent_dict(dict): dictionary containing parameters for agent actor_dict(dict): dictionary containing parameters for agents actor-model critic_dict(dict): dictionary containing parameters for agents critic-model """ enable_cuda = agent_dict.get("enable_cuda", False) if enable_cuda: self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") else: self.device = torch.device("cpu") self.num_agents = agent_dict.get("num_agents", 20) self.num_episodes = agent_dict.get("num_episodes", 10000) self.save_after = agent_dict.get("save_after", -1) self.name = agent_dict.get("name", "reacher") self.gamma = agent_dict.get("gamma", 0.9) self.tau = agent_dict.get("tau", 0.001) self.noise = utils.OUNoise((self.num_agents, 4), 0) self.num_replays = agent_dict.get("num_replays", 1) self.learning_rate_actor = agent_dict.get("learning_rate_actor", 1E-3) self.learning_rate_critic = agent_dict.get("learning_rate_critic", 1E-3) self.criterion = nn.MSELoss() memory_size = agent_dict.get("memory_size", 2**14) batchsize = agent_dict.get("batchsize", 2**10) replay_reg = agent_dict.get("replay_reg", 0.0) self.replay_buffer = utils.ReplayBuffer(memory_size, batchsize) self.actor = model.ActorModel(actor_dict).to(self.device) self.actor_target = model.ActorModel(actor_dict).to(self.device) self.critic = model.CriticModel(critic_dict).to(self.device) self.critic_target = model.CriticModel(critic_dict).to(self.device) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.learning_rate_actor) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=self.learning_rate_critic) utils.copy_model(self.actor, self.actor_target, tau=1.0) utils.copy_model(self.critic, self.critic_target, tau=1.0) seed = agent_dict.get("seed", 0) torch.manual_seed(seed) np.random.seed(seed)
def learn(self): """ Train actor and critic based on past experiences """ for t in range(self.num_replays): states_t, actions_t, rewards_t, next_states_t, dones_t = self.replay_buffer.sample( self.device) next_actions_t = self.actor_target(next_states_t) next_q_values_t = self.critic_target(next_states_t, next_actions_t).detach() targets_t = rewards_t + (self.gamma * next_q_values_t * (1 - dones_t)) q_values_t = self.critic(states_t, actions_t) self.critic_optimizer.zero_grad() critic_loss = self.criterion(q_values_t, targets_t) critic_loss.backward() self.critic_optimizer.step() proposed_actions_t = self.actor(states_t) proposed_q_values_t = self.critic(states_t, proposed_actions_t) self.actor_optimizer.zero_grad() actor_loss = -proposed_q_values_t.mean() actor_loss.backward() self.actor_optimizer.step() utils.copy_model(self.actor, self.actor_target, tau=self.tau) utils.copy_model(self.critic, self.critic_target, tau=self.tau)
def run(self, env): """ Train agent in environment env Params ====== env(Env): environment to train agent in """ recent_scores = deque(maxlen=100) recent_losses = deque(maxlen=100) f = open("performance.log", "w") f.write("#Score\tAvg.Score\n") for e in range(self.num_episodes): scores = np.zeros(self.num_agents) states = env.reset() done = False self.noise.reset() while True: actions, states_t = self.act(states) next_states, rewards, dones, _ = env.step(actions) scores += rewards next_states_t = self.preprocess(next_states) for a in range(self.num_agents): self.replay_buffer.append(states_t[a], actions[a], rewards[a], next_states_t[a], dones[a]) self.learn() states = next_states if np.any(dones): break recent_scores.append(np.mean(scores)) print("Iteration %i: score: %f\taverage_score: %f" % (e, np.mean(scores), np.mean(recent_scores))) f.write( str(np.mean(scores)) + "\t" + str(np.mean(recent_scores)) + "\n") f.flush() utils.copy_model(self.actor, self.actor_target, tau=self.tau) utils.copy_model(self.critic, self.critic_target, tau=self.tau) if e == self.save_after: self.save_state() f.close()
def __init__(self, agent_dict={}, model_dict={}): """ Initialize Agent object Params ====== agent_dict(dict): dictionary containing parameters for agent model_dict(dict): dictionary containing parameters for agents model """ if agent_dict.get("enable_gpu", False): self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") else: self.device = torch.device("cpu") self.num_episodes = agent_dict.get("num_episodes", 10000) self.save_after = agent_dict.get("save_after", -1) self.name = agent_dict.get("name", "banana_collector") self.gamma = agent_dict.get("gamma", 0.9) self.epsilon = agent_dict.get("epsilon_start", 1.0) self.epsilon_decay = agent_dict.get("epsilon_decay", 0.9) self.epsilon_min = agent_dict.get("epsilon_min", 0.1) self.tau = agent_dict.get("tau", 0.1) self.num_replays = agent_dict.get("num_replays", 1) self.criterion = nn.MSELoss() memory_size = agent_dict.get("memory_size", 2**14) batchsize = agent_dict.get("batchsize", 2**10) replay_reg = agent_dict.get("replay_reg", 0.0) self.replay_buffer = utils.PrioritizedReplayBuffer(memory_size, batchsize, epsilon=replay_reg) self.decision_model = model.Model(model_dict).to(self.device) self.policy_model = model.Model(model_dict).to(self.device) self.optimizer = optim.Adam(self.decision_model.parameters(), lr=1E-3) utils.copy_model(self.decision_model, self.policy_model, tau=1.0) seed = agent_dict.get("seed", 0) torch.manual_seed(seed) np.random.seed(seed)
def emit_model_event(self, evtname, evtsrc, *data, **kwargs): loop = IOLoop.instance() if "force" in kwargs or utils.is_dirty(evtsrc): src = utils.copy_model(evtsrc) if isinstance(evtname, (list, tuple)): for evt in evtname: loop.add_callback(self.emit, evt, src, *data) else: loop.add_callback(self.emit, evtname, src, *data)
def trainDecisionModel(self, q_values, reward, action, q_values_next, done): """ Train behavior model. Returns TD error and loss Params ====== q_values(torch.Tensor): action values reward(float): observed reward action(int): chosen action q_vals_next(torch.Tensor): action values of next state done(bool): flag indicating terminal state """ q_values_target, td_target = self.qValuesTarget( q_values, reward, action, q_values_next, done) self.optimizer.zero_grad() loss = self.criterion(q_values, q_values_target.detach()) loss.backward() self.optimizer.step() utils.copy_model(self.decision_model, self.policy_model, tau=self.tau) td_error = (td_target - q_values[action].cpu()).item() return td_error, loss
def run_NNclassifier(params): if params.out_dir is not None: if not os.path.exists(params.out_dir): os.makedirs(params.out_dir) for key, value in params.__dict__.items(): print str(key) + ': \t\t\t' + str(value) # load data: x: ntrials x ntimepoints x nfeatures, y: ntrials x = np.load(params.x_file) y = np.load(params.y_file) n_classes = len(np.unique(y)) # get train, val and test sets n_folds = int(100 / params.test_pcnt) Train, Val, Test = utils.make_kcrossvalidation(x, y, n_folds, shuffle=True) Train, Val, Test, means, stds = utils.zscore_dataset(Train, Val, Test, z_train=True, zscore_x=params.zscore, zscore_y=False) Train, Val, Test = utils.dim_check(Train, Val, Test, nn_type=params.nn_type, nn_dim=params.n_dim) # train model Models = [] for kfold in range(n_folds): print "Fold " + str(kfold) NN = utils.make_NN(n_classes=n_classes, params=params) M = NNClassifier(NN, lr = params.lr, w_decay=params.w_decay) ktrain = utils.augment(Train[kfold], n_times=params.augment_times) if params.augment else Train[kfold] M.train(ktrain, Val[kfold], n_epochs=params.n_epochs, batch_size=params.batch_size, no_improve_lim=params.early_stop) M.test(Test[kfold]) Models.append(utils.copy_model(M, copyall=params.save_weights)) # save models pM = utils.concat_models(Models) if params.out_dir is not None: utils.save_model(pM, params.out_dir + '/model' + str(n_folds) + '.p') print "Total performance: " + \ str(round(sum(pM.test_correct) / float(sum(pM.test_n)), 4)) + " (" +\ str(sum(pM.test_correct)) + '/' + str(sum(pM.test_n)) + ")"
def column_generation(n, demands, capacity, distances, duals, MP_branch): SP_branch = SubProblem(n, demands, capacity, distances, duals) SP_branch.build_model() SP_branch.optimize() new_MP = None newAssing = [SP_branch.y[i].x for i in SP_branch.y] # new route obj = get_min_dist(newAssing, distances) # Cost of new route if obj + SP_branch.modelo.ObjVal < 0.0: newColumn = gp.Column(newAssing, MP_branch.modelo.getConstrs()) MP_branch.modelo.addVar(vtype=GRB.BINARY, obj=obj, column=newColumn) MP_branch.modelo.update() MP_branch.RelaxOptimize() best_cost = MP_branch.getCosts() routes = MP_branch.modelo.getA().toarray() new_MP = copy_model(best_cost, routes, MP_branch) return new_MP
def branch_n_price(n, demands, capacity, distances, MasterProb): queue = PriorityQueue() MasterProb.RelaxOptimize() obj_val = MasterProb.relax_modelo.ObjVal queue.insert(obj_val, MasterProb) best_int_obj = 1e3 best_relax_obj = 1e3 nodes_explored = 0 best_model = None while not queue.isEmpty(): obj_val, MP_branch = queue.delete() nodes_explored += 1 MP_branch.RelaxOptimize() solution = MP_branch.getSolution() duals = MP_branch.getDuals() branch_cost = MP_branch.getCosts() branch_routes = MP_branch.modelo.getA().toarray() sol_is_int = all([float(round(s, 4)).is_integer() for s in solution]) # sol_is_int = all([False if i > 0.3 and np.abs(i - 1.0) > 0.3 else True for i in solution ]) if obj_val < best_int_obj and sol_is_int: print(f"Best Integer Obj: {obj_val}") print(f"Nodes explored: {nodes_explored}") best_int_obj = obj_val # print(f"best sol: {solution}") best_model = copy_model(branch_cost, branch_routes, MP_branch) if obj_val < best_relax_obj: print(f"Best Relaxed Obj: {obj_val}") print(f"Nodes explored: {nodes_explored}") best_relax_obj = obj_val # --- # --- # Column generation # --- # --- # new_MP = column_generation(n, demands, capacity, distances, duals, MP_branch) if new_MP != None: new_MP.RelaxOptimize() branch_cost = new_MP.getCosts() branch_routes = new_MP.modelo.getA().toarray() if new_MP.relax_modelo.ObjVal <= best_relax_obj: queue.insert(new_MP.relax_modelo.ObjVal, copy_model(branch_cost, branch_routes, new_MP)) else: # --- # If stopped col generation then branch if solution is not integer # --- # if not sol_is_int: # print("#--#--#--# Not integer solution ........Branching") queue = branch(branch_cost, branch_routes, n, demands, capacity, distances, duals, solution, MP_branch, queue, best_relax_obj) else: # print(f"best sol: {solution}") best_model = MP_branch return best_model
def branch(branch_cost, branch_routes, n, demands, capacity, distances, duals, solution_to_branch, MP_to_copy, queue, best_inc_obj): frac_ixs = [] for ix, val in enumerate(solution_to_branch): if val > 0.0 and val < 1.0: frac_ixs.append(ix) A_mp = MP_to_copy.modelo.getA().toarray() locations_index = list(MP_to_copy.locations_index) for comb in combinations(frac_ixs, 2): SP_1 = SubProblem(n, demands, capacity, distances, duals) SP_2 = SubProblem(n, demands, capacity, distances, duals) SP_1.build_model() SP_2.build_model() s1_and_s2 = [ True if (A_mp[i - 1, comb[0]] == 1 and A_mp[i - 1, comb[1]] == 1) else False for i in range(len(MP_to_copy.locations_index)) ] s1_not_s2 = [ True if (A_mp[i - 1, comb[0]] == 1 and A_mp[i - 1, comb[1]] == 0) else False for i in range(len(MP_to_copy.locations_index)) ] for i in locations_index: locations_prime = [x for x in locations_index if x != i] for j in locations_prime: if (s1_and_s2[i - 1] and s1_not_s2[j - 1]): # SP_1.modelo.addConstr(SP_1.y[i - 1] + SP_1.y[j - 1] == 2) # SP_2.modelo.addConstr(SP_2.y[i - 1] + SP_2.y[j - 1] == 1) SP_1.modelo.addConstr(SP_1.y[i - 1] == 1) SP_1.modelo.addConstr(SP_1.y[j - 1] == 1) SP_2.modelo.addConstr(SP_2.y[i - 1] == 1) SP_2.modelo.addConstr(SP_2.y[j - 1] == 0) MP_1, MP_2 = copy_models(branch_cost, branch_routes, MP_to_copy) SP_1.modelo.update() SP_1.optimize() if SP_1.modelo.Status == 2: newAssing = [SP_1.y[i].x for i in SP_1.y] # new Assingment obj = get_min_dist(newAssing, distances) # Cost of new route if obj + SP_1.modelo.ObjVal < 0.0: newColumn = gp.Column(newAssing, MP_1.modelo.getConstrs()) MP_1.modelo.addVar(vtype=GRB.BINARY, obj=obj, column=newColumn) MP_1.modelo.update() MP_1.RelaxOptimize() mp1_cost = MP_1.getCosts() mp1_routes = MP_1.modelo.getA().toarray() if MP_1.relax_modelo.ObjVal <= best_inc_obj: queue.insert(MP_1.relax_modelo.ObjVal, copy_model(mp1_cost, mp1_routes, MP_1)) SP_2.modelo.update() SP_2.optimize() if SP_2.modelo.Status == 2: newAssing = [SP_2.y[i].x for i in SP_2.y] # new Assingment obj = get_min_dist(newAssing, distances) # Cost of new route if obj + SP_2.modelo.ObjVal < 0.0: newColumn = gp.Column(newAssing, MP_2.modelo.getConstrs()) MP_2.modelo.addVar(vtype=GRB.BINARY, obj=obj, column=newColumn) MP_2.modelo.update() MP_2.RelaxOptimize() mp2_cost = MP_2.getCosts() mp2_routes = MP_2.modelo.getA().toarray() if MP_2.relax_modelo.ObjVal <= best_inc_obj: queue.insert(MP_2.relax_modelo.ObjVal, copy_model(mp2_cost, mp2_routes, MP_2)) return queue
def main(_): """ main function """ dataset = data_manager.DataManager(init_data=FLAGS.allow_init_data) model_dir = '../../runs/bag/{}'.format(FLAGS.model) settings = NETwork.Settings() settings.vocab_size = len(dataset.wordembedding) settings.num_classes = len(dataset.train_y[0]) settings.filter_sizes = list(map(int, FLAGS.filter_sizes.split(','))) settings.pattern_num = FLAGS.pattern_num settings.l2_reg_omega = FLAGS.l2_reg_omega with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.4) gpu_options = tf.GPUOptions(allow_growth=True) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) sess = tf.Session(config=session_conf) with sess.as_default(): # Output directory for models and summaries # timestamp = str(int(time.time())) timestamp = FLAGS.model out_dir = os.path.abspath( os.path.join(os.path.pardir, os.path.pardir + '/runs/bag', timestamp)) print('Construct network for train......') network = NETwork.CNN(word_embeddings=dataset.wordembedding, settings=settings, is_training=True, is_evaluating=False, use_types=FLAGS.use_types) # Get Evaluator for evaluation if FLAGS.allow_evaluation: print('Construct network for evaluation......') e_network = NETwork.CNN(word_embeddings=dataset.wordembedding, settings=settings, is_training=True, is_evaluating=True, use_types=FLAGS.use_types) lastest_score = utils.read_pr(out_dir) evaluator = evaluation.Evaluator(dataset, sess, e_network, model_dir, settings, lastest_score) # Define training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(0.001) grads_and_vars = optimizer.compute_gradients(network.final_loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Summaries for loss and accuracy loss_summary = tf.summary.scalar('loss', network.final_loss) acc_summary = tf.summary.scalar('accuracy', network.accuracy) pr_summary = tf.summary.scalar('pr_curve', evaluator.highest_score) # Train summaries train_summary_op = tf.summary.merge_all() train_summary_dir = os.path.join(out_dir, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Checkpoint derectory, tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) # train_step def train_step(word_batch, pos1_batch, pos2_batch, type_batch, y_batch, mask_batch): """ A single training step """ total_word = [] total_pos1 = [] total_pos2 = [] total_type = [] total_shape = [] total_mask = [] total_num = 0 for i in range(len(word_batch)): total_shape.append(total_num) total_num += len(word_batch[i]) for j in range(len(word_batch[i])): total_word.append(word_batch[i][j]) total_pos1.append(pos1_batch[i][j]) total_pos2.append(pos2_batch[i][j]) total_type.append(type_batch[i][j]) total_mask.append(mask_batch[i][j]) # Here total_word and y_batch are not equal, total_word[total_shape[i]:total_shape[i+1]] is related to y_batch[i] total_shape.append(total_num) total_shape = np.array(total_shape) total_word = np.array(total_word) total_pos1 = np.array(total_pos1) total_pos2 = np.array(total_pos2) total_type = np.array(total_type) total_mask = np.array(total_mask) feed_dict = { network.input_word: total_word, network.input_pos1: total_pos1, network.input_pos2: total_pos2, network.input_type: total_type, network.input_y: y_batch, network.total_shape: total_shape, network.dropout_keep_prob: FLAGS.dropout_keep_prob, # network.input_mask: total_mask } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, network.final_loss, network.accuracy ], feed_dict) train_summary_writer.add_summary(summaries, step) """ if step % 100 == 0: time_str = datetime.datetime.now().isoformat() print('{}: step {}, loss {:g}, acc {:g}'.format(time_str, step, loss, accuracy)) """ """ Train epochs """ print('Start training......') for epoch in range(FLAGS.num_epochs): # Randomly shuffle data shuffle_indices = np.random.permutation( np.arange(len(dataset.train_y))) num_batches_per_epoch = int( (len(dataset.train_y) - 1) / settings.batch_size) + 1 #num_batches_per_epoch = int(len(shuffle_indices)/float(settings.batch_size)) epoch_last_step = 0 for batch_num in range(num_batches_per_epoch): start_index = batch_num * settings.batch_size end_index = min((batch_num + 1) * settings.batch_size, len(dataset.train_y)) if (end_index - start_index) != settings.batch_size: start_index = end_index - settings.batch_size batch_index = shuffle_indices[start_index:end_index] word_batch = dataset.train_word[batch_index] pos1_batch = dataset.train_pos1[batch_index] pos2_batch = dataset.train_pos2[batch_index] type_batch = dataset.train_type[batch_index] mask_batch = dataset.train_mask[batch_index] y_batch = dataset.train_y[batch_index] train_step(word_batch, pos1_batch, pos2_batch, type_batch, y_batch, mask_batch) if epoch % FLAGS.checkpoint_every == 0: epoch_last_step = tf.train.global_step(sess, global_step) path = saver.save(sess, checkpoint_prefix, global_step=epoch_last_step) print( 'Epoch {} batch {} Saved model checkpoint to {}, pattern_num {}' .format(epoch, batch_num, path, FLAGS.pattern_num)) if FLAGS.allow_evaluation and epoch % FLAGS.evaluate_every == 0: new_highest = evaluator.test() print( 'Best precision recall area now is {}, progress: {}\n'. format( evaluator.highest_score, utils.calculate_progress(epoch, FLAGS.pattern_num))) if new_highest: utils.copy_model(out_dir, epoch_last_step) utils.store_pr(out_dir, evaluator.highest_score) print('final best precision recall: {} pattern_num: {}\n'.format( evaluator.highest_score, FLAGS.pattern_num))