def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # ---------------------------- print("Preparing for workers...") # ---------------------------- pg_learners = [] envs = [] nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) ### create sequence of environments for each of the num_ex job sets/sequences for ex in xrange(pa.num_ex): print "-prepare for env-", ex env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) ### generate sequence of NNs for each batch, each of which is a a policy gradient agent for ex in xrange(pa.batch_size + 1): # last worker for updating the parameters print "-prepare for worker-", ex pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = cPickle.load(net_handle) pg_learner.set_net_params(net_params) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- print("Preparing for reference data...") # -------------------------------------- ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) mean_rew_lr_curve = [] max_rew_lr_curve = [] slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in xrange(1, pa.num_epochs): ### use a thread for each use manager to share results across threads ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = range(pa.num_ex) np.random.shuffle(ex_indices) all_eprews = [] grads_all = [] loss_all = [] eprews = [] eplens = [] all_slowdown = [] all_entropy = [] ex_counter = 0 ### for each jobset for ex in xrange(pa.num_ex): ex_idx = ex_indices[ex] ### evaluate several instances of trajectories for set of PG agents p = Process(target=get_traj_worker, args=( pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 ## if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print ex, "out of", pa.num_ex ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples( [r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad( all_ob, all_action, all_adv) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result ])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result ])) # episode lengths all_slowdown.extend( np.concatenate([r["all_slowdown"] for r in result])) all_entropy.extend( np.concatenate([r["all_entropy"] for r in result])) # assemble gradients grads = grads_all[0] for i in xrange(1, len(grads_all)): for j in xrange(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in xrange(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end = time.time() print "-----------------" print "Iteration: \t %i" % iteration print "NumTrajs: \t %i" % len(eprews) print "NumTimesteps: \t %i" % np.sum(eplens) # print "Loss: \t %s" % np.mean(loss_all) print "MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews]) print "MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews)) print "MeanSlowdown: \t %s" % np.mean(all_slowdown) print "MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens)) print "MeanEntropy \t %s" % (np.mean(all_entropy)) print "Elapsed time\t %s" % (timer_end - timer_start), "seconds" print "-----------------" timer_start = time.time() max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews])) mean_rew_lr_curve.append(np.mean(eprews)) slow_down_lr_curve.append(np.mean(all_slowdown)) if iteration % pa.output_freq == 0: param_file = open( pa.output_filename + '_' + str(iteration) + '.pkl', 'wb') cPickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1) param_file.close() pa.unseen = True slow_down_cdf.launch(pa, pa.output_filename + '_' + str(iteration) + '.pkl', render=False, plot=True, repre=repre, end=end) pa.unseen = False # test on unseen examples plot_lr_curve(pa.output_filename, max_rew_lr_curve, mean_rew_lr_curve, slow_down_lr_curve, ref_discount_rews, ref_slow_down)
def launch(pa, pg_resume=None, render=False, plot=False, repre='image', end='no_new_job', q_resume=None): # ---- Parameters ---- test_types = ['LLQ', 'Random'] if (pg_resume is not None) and (q_resume is None): test_types = ['PG'] + test_types if q_resume is not None: test_types = ['Q'] + test_types nw_len_seqs = job_distribution.generate_sequence_work(pa, seed=42) env = environment.Env(pa, nw_len_seqs=nw_len_seqs, render=render, repre=repre, end=end) # env = environment.Env(pa, render=render, repre=repre, end=end) all_discount_rews = {} jobs_slow_down = {} work_complete = {} work_remain = {} job_len_remain = {} num_job_remain = {} job_remain_delay = {} for test_type in test_types: all_discount_rews[test_type] = [] jobs_slow_down[test_type] = [] work_complete[test_type] = [] work_remain[test_type] = [] job_len_remain[test_type] = [] num_job_remain[test_type] = [] job_remain_delay[test_type] = [] for seq_idx in xrange(pa.num_ex): print('\n\n') print("=============== " + str(seq_idx) + " ===============") for test_type in test_types: rews, info = get_traj(test_type, pa, env, pa.episode_max_length, pg_resume=pg_resume, q_agent=q_resume) print "---------- " + test_type + " -----------" print "total discount reward : \t %s" % (discount( rews, pa.discount)[0]) all_discount_rews[test_type].append(discount(rews, pa.discount)[0]) # ------------------------ # ---- per job stat ---- # ------------------------ enter_time = np.array( [info.record[i].enter_time for i in xrange(len(info.record))]) finish_time = np.array( [info.record[i].finish_time for i in xrange(len(info.record))]) job_len = np.array( [info.record[i].len for i in xrange(len(info.record))]) # job_total_size = np.array([np.sum(info.record[i].res_vec) for i in xrange(len(info.record))]) finished_idx = (finish_time >= 0) unfinished_idx = (finish_time < 0) jobs_slow_down[test_type].append( (finish_time[finished_idx] - enter_time[finished_idx]) / job_len[finished_idx]) work_complete[test_type].append( np.sum( job_len[finished_idx]) # * job_total_size[finished_idx]) ) work_remain[test_type].append( np.sum(job_len[unfinished_idx] ) # * job_total_size[unfinished_idx]) ) job_len_remain[test_type].append(np.sum(job_len[unfinished_idx])) num_job_remain[test_type].append(len(job_len[unfinished_idx])) job_remain_delay[test_type].append( np.sum(pa.episode_max_length - enter_time[unfinished_idx])) env.seq_no = (env.seq_no + 1) % env.pa.num_ex # -- matplotlib colormap no overlap -- if plot: num_colors = len(test_types) cm = plt.get_cmap('gist_rainbow') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle( [cm(1. * i / num_colors) for i in range(num_colors)]) for test_type in test_types: slow_down_cdf = np.sort(np.concatenate(jobs_slow_down[test_type])) slow_down_yvals = np.arange(len(slow_down_cdf)) / float( len(slow_down_cdf)) ax.plot(slow_down_cdf, slow_down_yvals, linewidth=2, label=test_type) plt.legend(loc=4) plt.xlabel("job slowdown", fontsize=20) plt.ylabel("CDF", fontsize=20) # plt.show() plt.savefig(pg_resume + "_slowdown_fig" + ".pdf") return all_discount_rews, jobs_slow_down
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): env = environment.Env(pa, render=False, repre=repre, end=end) pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'r') net_params = cPickle.load(net_handle) pg_learner.set_net_params(net_params) if pa.evaluate_policy_name == "SJF": evaluate_policy = other_agents.get_sjf_action elif pa.evaluate_policy_name == "PACKER": evaluate_policy = other_agents.get_packer_action else: print("Panic: no policy known to evaluate.") exit(1) # ---------------------------- print("Preparing for data...") # ---------------------------- nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) # print 'nw_time_seqs=', nw_len_seqs # print 'nw_size_seqs=', nw_size_seqs mem_alloc = 4 X = np.zeros([ pa.simu_len * pa.num_ex * mem_alloc, 1, pa.network_input_height, pa.network_input_width ], dtype=theano.config.floatX) y = np.zeros(pa.simu_len * pa.num_ex * mem_alloc, dtype='int32') print 'network_input_height=', pa.network_input_height print 'network_input_width=', pa.network_input_width counter = 0 for train_ex in range(pa.num_ex): env.reset() for _ in xrange(pa.episode_max_length): # ---- get current state ---- ob = env.observe() a = evaluate_policy(env.machine, env.job_slot) if counter < pa.simu_len * pa.num_ex * mem_alloc: add_sample(X, y, counter, ob, a) counter += 1 ob, rew, done, info = env.step(a, repeat=True) if done: # hit void action, exit break # roll to next example env.seq_no = (env.seq_no + 1) % env.pa.num_ex num_train = int(0.8 * counter) num_test = int(0.2 * counter) X_train, X_test = X[:num_train], X[num_train:num_train + num_test] y_train, y_test = y[:num_train], y[num_train:num_train + num_test] # Normalization, make sure nothing becomes NaN # X_mean = np.average(X[:num_train + num_test], axis=0) # X_std = np.std(X[:num_train + num_test], axis=0) # # X_train = (X_train - X_mean) / X_std # X_test = (X_test - X_mean) / X_std # ---------------------------- print("Start training...") # ---------------------------- for epoch in xrange(pa.num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, pa.batch_size, shuffle=True): inputs, targets = batch err, prob_act = pg_learner.su_train(inputs, targets) pg_act = np.argmax(prob_act, axis=1) train_err += err train_acc += np.sum(pg_act == targets) train_batches += 1 # # And a full pass over the test data: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, pa.batch_size, shuffle=False): inputs, targets = batch err, prob_act = pg_learner.su_test(inputs, targets) pg_act = np.argmax(prob_act, axis=1) test_err += err test_acc += np.sum(pg_act == targets) test_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, pa.num_epochs, time.time() - start_time)) print(" training loss: \t\t{:.6f}".format(train_err / train_batches)) print(" training accuracy:\t\t{:.2f} %".format( train_acc / float(num_train) * 100)) print(" test loss: \t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy: \t\t{:.2f} %".format(test_acc / float(num_test) * 100)) sys.stdout.flush() if epoch % pa.output_freq == 0: net_file = open( pa.output_filename + '_net_file_' + str(epoch) + '.pkl', 'wb') cPickle.dump(pg_learner.return_net_params(), net_file, -1) net_file.close() print("done")
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # ---------------------------- print("Preparing for workers...") # ---------------------------- pg_learners = [] envs = [] nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) for ex in range(pa.num_ex): print("-prepare for env-", ex) env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) print("-prepare for worker-") rl = Policy_Network.PolicyGradient(n_actions=pa.network_output_dim, n_features=pa.network_input_height * pa.network_input_width, learning_rate=0.02) print("policy network params count: ", rl.get_num_params()) # pg_learner = pg_network.PGLearner(pa) # if pg_resume is not None: # net_handle = open(pg_resume, 'rb') # net_params = cPickle.load(net_handle) # pg_learner.set_net_params(net_params) # pg_learners.append(pg_learner) if pg_resume is not None: rl.load_data(pg_resume) # accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- print("Preparing for reference data...") # -------------------------------------- ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) mean_rew_lr_curve = [] max_rew_lr_curve = [] slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in range(1, pa.num_epochs): ex_indices = list(range(pa.num_ex)) np.random.shuffle(ex_indices) all_eprews = [] eprews = [] eplens = [] all_slowdown = [] eprewlist = [] eplenlist = [] slowdownlist = [] losslist = [] ex_counter = 0 for ex in range(pa.num_ex): ex_idx = ex_indices[ex] eprew, eplen, slowdown, all_ob, all_action, all_adv = get_traj_worker( rl, envs[ex_idx], pa) eprewlist.append(eprew) eplenlist.append(eplen) slowdownlist.append(slowdown) loss = rl.learn(all_ob, all_action, all_adv) losslist.append(loss) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print("\n\n") ex_counter = 0 # all_eprews.extend([r["all_eprews"] for r in result]) # eprews.extend(np.concatenate([r["all_eprews"] for r in result])) # episode total rewards # eplens.extend(np.concatenate([r["all_eplens"] for r in result])) # episode lengths # all_slowdown.extend(np.concatenate([r["all_slowdown"] for r in result])) # assemble gradients # grads = grads_all[0] # for i in range(1, len(grads_all)): # for j in range(len(grads)): # grads[j] += grads_all[i][j] # propagate network parameters to others # params = pg_learners[pa.batch_size].get_params() # rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) # for i in range(pa.batch_size + 1): # pg_learners[i].set_net_params(params) timer_end = time.time() print("-----------------") print("Iteration: \t %i" % iteration) print("NumTrajs: \t %i" % len(eprewlist)) print("NumTimesteps: \t %i" % np.sum(eplenlist)) print("Loss: \t %s" % np.mean(losslist)) print("MaxRew: \t %s" % np.average([np.max(rew) for rew in eprewlist])) print("MeanRew: \t %s +- %s" % (np.mean(eprewlist), np.std(eprewlist))) print("MeanSlowdown: \t %s" % np.mean([np.mean(sd) for sd in slowdownlist])) print("MeanLen: \t %s +- %s" % (np.mean(eplenlist), np.std(eplenlist))) print("Elapsed time\t %s" % (timer_end - timer_start), "seconds") print("-----------------") timer_start = time.time() max_rew_lr_curve.append(np.average([np.max(rew) for rew in eprewlist])) mean_rew_lr_curve.append(np.mean(eprewlist)) slow_down_lr_curve.append(np.mean([np.mean(sd) for sd in slowdownlist])) if iteration % pa.output_freq == 0: rl.save_data(pa.output_filename + '_' + str(iteration)) pa.unseen = True # slow_down_cdf.launch(pa, pa.output_filename + '_' + str(iteration) + '.ckpt', # render=False, plot=True, repre=repre, end=end) pa.unseen = False # test on unseen examples plot_lr_curve(pa.output_filename, max_rew_lr_curve, mean_rew_lr_curve, slow_down_lr_curve, ref_discount_rews, ref_slow_down)
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # ---------------------------- print("Preparing for workers...") # ---------------------------- # dimension of state space # NOTE: we have to flatten the images before sending them into the network... state_dim = pa.network_input_height * pa.network_input_width # number of actions num_actions = pa.network_output_dim # initialize the q networks sess = tf.Session() optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=0.9) q_learner = DeepQLearner(session=sess, optimizer=optimizer, q_network=build_q_learner, state_dim=state_dim, num_actions=num_actions, discount_factor=pa.discount) envs = [] nw_len_seqs = job_distribution.generate_sequence_work(pa, seed=42) ### create sequence of environments for each of the num_ex job sets/sequences for ex in xrange(pa.num_ex): print "-prepare for env-", ex env = environment.Env(pa, nw_len_seqs=nw_len_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) # ### generate sequence of NNs for each batch, each of which is a a policy gradient agent # for ex in xrange(pa.batch_size + 1): # last worker for updating the parameters # print "-prepare for worker-", ex # pg_learner = pg_network.PGLearner(pa) # if pg_resume is not None: # net_handle = open(pg_resume, 'rb') # net_params = cPickle.load(net_handle) # pg_learner.set_net_params(net_params) # pg_learners.append(pg_learner) # accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- print("Preparing for reference data...") # -------------------------------------- ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) mean_rew_lr_curve = [] max_rew_lr_curve = [] slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in xrange(1, pa.num_epochs): ### use a thread for each use manager to share results across threads # ps = [] # threads # manager = Manager() # managing return results # manager_result = manager.list([]) ex_indices = range(pa.num_ex) np.random.shuffle(ex_indices) all_eprews = [] loss_all = [] eprews = [] eplens = [] all_slowdown = [] ex_counter = 0 ### for each jobset for ex in xrange(pa.num_ex): ex_idx = ex_indices[ex] current_env = envs[ex_idx] man_result = [] get_traj_worker(q_learner, current_env, pa, man_result) ### evaluate several instances of trajectories for set of PG agents # p = Process(target=get_traj_worker, # args=(pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) # ps.append(p) ex_counter += 1 all_eprews.extend([r["all_eprews"] for r in man_result]) eprews.extend(np.concatenate([r["all_eprews"] for r in man_result])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in man_result])) # episode lengths all_slowdown.extend(np.concatenate([r["all_slowdown"] for r in man_result])) ## # if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: # print ex, "out of", pa.num_ex # ex_counter = 0 # for p in ps: # p.start() # for p in ps: # p.join() # result = [] # convert list from shared memory # for r in manager_result: # result.append(r) # ps = [] # manager_result = manager.list([]) # all_ob = concatenate_all_ob_across_examples([r["all_ob"] for r in result], pa) # all_action = np.concatenate([r["all_action"] for r in result]) # all_adv = np.concatenate([r["all_adv"] for r in result]) # all_eprews.extend([r["all_eprews"] for r in result]) # eprews.extend(np.concatenate([r["all_eprews"] for r in result])) # episode total rewards # eplens.extend(np.concatenate([r["all_eplens"] for r in result])) # episode lengths # all_slowdown.extend(np.concatenate([r["all_slowdown"] for r in result])) # # assemble gradients # grads = grads_all[0] # for i in xrange(1, len(grads_all)): # for j in xrange(len(grads)): # grads[j] += grads_all[i][j] # # propagate network parameters to others # params = pg_learners[pa.batch_size].get_params() # rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) # for i in xrange(pa.batch_size + 1): # pg_learners[i].set_net_params(params) timer_end = time.time() print "-----------------" print "Iteration: \t %i" % iteration print "NumTrajs: \t %i" % len(eprews) print "NumTimesteps: \t %i" % np.sum(eplens) # print "Loss: \t %s" % np.mean(loss_all) print "MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews]) print "MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews)) print "MeanSlowdown: \t %s" % np.mean(all_slowdown) print "MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens)) print "Elapsed time\t %s" % (timer_end - timer_start), "seconds" print "-----------------" timer_start = time.time() max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews])) mean_rew_lr_curve.append(np.mean(eprews)) slow_down_lr_curve.append(np.mean(all_slowdown)) if iteration % pa.output_freq == 0: # param_file = open(pa.output_filename + '_' + str(iteration) + '.pkl', 'wb') # cPickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1) # param_file.close() pa.unseen = True slow_down_cdf.launch(pa, pa.output_filename + '_' + str(iteration) + '.pkl', render=False, plot=True, repre=repre, end=end, q_resume=q_learner) pa.unseen = False # test on unseen examples plot_lr_curve(pa.output_filename, max_rew_lr_curve, mean_rew_lr_curve, slow_down_lr_curve, ref_discount_rews, ref_slow_down)
def test(it, pa, pg_resume, workloads, episode_max_length=200): repre = 'image' end = 'all_done' agent = Heuristic_Agents() pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) new_env = Env1(0, 1) job_distribution = Dist() nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) logline = str(it) + '\n' for ex in range(pa.num_test_ex): env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex + pa.num_ex new_env.reset() new_env.workload_seq = workloads[ex + pa.num_ex] new_env.generate_workload() print('Testing : ', new_env.workload_seq) env.reset() obs = [] new_obs = [] acts = [] new_acts = [] rews = [] utils = '' suffer = [] new_rews = [] entropy = [] finished_episode_len = 0 crs = [0] * pa.num_machines crs_max = [0] * pa.num_machines info = [] new_ob = new_env.observe() ob = env.observe() counter = 0 for _ in range(200): act_prob = pg_learner.get_one_act_prob(ob) a = np.argmax(act_prob) act = agent.get_action(new_env, a) new_obs.append(new_ob) new_acts.append(act) new_ob, new_rews, done1, info1 = new_env.step(act, _, new_rews, a) #a = (csprob_n > np.random.rand()).argmax() #np.set_printoptions(linewidth=40*5, precision = 2, threshold=np.nan) # print('State>>',ob) # print('Action>>',a) obs.append(ob) # store the ob at current decision making step acts.append(a) ob, rew, done, info = env.step(a, repeat=True) counter += 1 if info == 'Allocation_Success': finished_episode_len = _ + 1 # print('Reward>>',rew) rews.append(rew) entropy.append(get_entropy(act_prob)) if done1: break util = '' for k, machine in enumerate(new_env.machines): if len(machine.running_tasks) > 0: if machine.cpus_left >= 0: util += str(machine.total_cpus - machine.cpus_left) + ',' else: util += str(machine.total_cpus) + ',' suffer.append(abs(machine.cpus_left)) else: util += str(0) + ',' crs_this_time = [0] * pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i + 1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[ i], machine.running_tasks[j] if task_i != task_j: crs[k] += pa.interference_penalty * ( task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) crs_this_time[k] += pa.interference_penalty * ( task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) crs_max[k] = max(crs_max[k], crs_this_time[k]) ################# utils += util + '|' logline += str( str(counter - 1) + '|' + str(utils) + str(finished_episode_len)) + '\n' + str( sum(new_rews)) + '\n' + str(sum(suffer)) + '\n' for i in range(len(new_env.machines)): logline += str(crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(new_env.machines)): logline += str(crs_max[i]) + ',' logline = logline[:-1] logline += '\n' print('Iteration number ', it) print('Example No:,', ex) print('Test Actions : ', new_acts) print('Reward : ', new_rews) print('Total reward : ', sum(new_rews)) return logline
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() # ---------------------------- print("Preparing for workers...") # ---------------------------- #logs = open('/home/shanka/logs_packing_deeprm', 'a') pg_learners = [] envs = [] job_distribution = Dist() nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) for ex in range(pa.num_ex): print("-prepare for env-", ex) env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) for ex in range(pa.batch_size + 1): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- # print("Preparing for reference data...") # -------------------------------------- # print('Start testing...') # for ite in range(10,1000,10): # pg_resume = pa.output_filename +'_'+str(ite)+'.pkl' # logline=test(ite,pa, pg_resume,workloads,repre) # logs.write(logline) # logs.flush() # os.fsync(logs.fileno()) # return # ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) # mean_rew_lr_curve = [] # max_rew_lr_curve = [] # slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in range(1, pa.num_epochs): ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = range(pa.num_ex) # np.random.shuffle(ex_indices) all_eprews = [] grads_all = [] loss_all = [] eprews = [] eplens = [] all_slowdown = [] all_entropy = [] ex_counter = 0 for ex in range(pa.num_ex): ex_idx = ex_indices[ex] p = Process(target=get_traj_worker, args=( pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print(ex, "out of", pa.num_ex) ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples( [r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad( all_ob, all_action, all_adv) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result ])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result ])) # episode lengths # all_slowdown.extend(np.concatenate([r["all_slowdown"] for r in result])) # all_entropy.extend(np.concatenate([r["all_entropy"] for r in result])) # assemble gradients grads = grads_all[0] for i in range(1, len(grads_all)): for j in range(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in range(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end = time.time() print("-----------------") print("Iteration: \t %i" % iteration) print("NumTrajs: \t %i" % len(eprews)) print("NumTimesteps: \t %i" % np.sum(eplens)) # print "Loss: \t %s" % np.mean(loss_all) print("MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews])) print("MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews))) # print "MeanSlowdown: \t %s" % np.mean(all_slowdown) print("MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens))) # print "MeanEntropy \t %s" % (np.mean(all_entropy)) print("Elapsed time\t %s" % (timer_end - timer_start), "seconds") print("-----------------") # max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews])) # mean_rew_lr_curve.append(np.mean(eprews)) # slow_down_lr_curve.append(np.mean(all_slowdown)) if iteration % pa.output_freq == 0: pg_resume = pa.output_filename + '_' + str(iteration) + '.pkl' param_file = open(pg_resume, 'wb') pickle.dump(pg_learner.get_params(), param_file, -1) param_file.close() test(pa, pg_resume, workloads, repre)
def __init__(self, pa, nw_len_seqs=None, nw_size_seqs=None, seed=None, render=False, repre='image', end='no_new_job'): self.pa = pa self.render = render self.repre = repre # image or compact representation self.end = end # termination type, 'no_new_job' or 'all_done' # rnn stuff if self.pa.rnn: self.rnn = tf_dist_rnn_object.dist_rnn(pa) self.rnn.train() else: self.rnn = None self.rnn_offset = 0 # used to represent how many things have been generated by the rnn self.nw_dist = pa.dist.bi_model_dist self.curr_time = 0 # set up random seed if self.pa.unseen: np.random.seed(None) else: np.random.seed(seed) if self.pa.rnn: ori_simu_len = pa.simu_len pa.simu_len = pa.simu_len + self.rnn.SEQ_LEN if nw_len_seqs is None or nw_size_seqs is None: # generate new work self.nw_len_seqs, self.nw_size_seqs = \ job_distribution.generate_sequence_work(pa, seed=None) self.workload = np.zeros(pa.num_res) for i in xrange(pa.num_res): self.workload[i] = \ np.sum(np.reshape(self.nw_size_seqs[:, :, i], self.pa.simu_len * self.pa.num_ex) * np.reshape(self.nw_len_seqs[:, :], self.pa.simu_len * self.pa.num_ex)) / \ float(pa.res_slot) / \ float(len(self.nw_len_seqs)) print("Load on # " + str(i) + " resource dimension is " + str(self.workload[i])) self.nw_len_seqs = np.reshape(self.nw_len_seqs, [self.pa.num_ex, self.pa.simu_len]) self.nw_size_seqs = np.reshape( self.nw_size_seqs, [self.pa.num_ex, self.pa.simu_len, self.pa.num_res]) else: self.nw_len_seqs = nw_len_seqs self.nw_size_seqs = nw_size_seqs if self.pa.rnn: print(self.nw_size_seqs.shape) print(self.nw_len_seqs) self.len_seeds_for_rnn = self.nw_len_seqs[:, :self.rnn.SEQ_LEN] self.res_seeds_for_rnn = self.nw_size_seqs[:, :self.rnn.SEQ_LEN, :] self.nw_len_seqs = self.nw_len_seqs[:, self.rnn.SEQ_LEN:] self.nw_size_seqs = self.nw_size_seqs[:, self.rnn.SEQ_LEN:, :] pa.simu_len = ori_simu_len self.seq_no = 0 # which example sequence self.seq_idx = 0 # index in that sequence # initialize system self.machine = Machine(pa) self.job_slot = JobSlot(pa) self.job_backlog = JobBacklog(pa) self.job_record = JobRecord() self.extra_info = ExtraInfo(pa) if self.pa.rnn: self.seed_rnn()
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # ---------------------------- print("Preparing for workers...") # ---------------------------- pg_learners = [] envs = [] nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) for ex in xrange(pa.num_ex): # number of sequences print "-prepare for env-", ex env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=True, repre=repre, end=end) env.seq_no = ex envs.append(env) for ex in xrange(pa.batch_size + 1): # last worker for updating the parameters print "-prepare for worker-", ex pg_learner = pg_network.PGLearner(pa) startIndex = 0 if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = cPickle.load(net_handle) pg_learner.set_net_params(net_params) startIndex = re.match(pg_resume, '\d+').group() startIndex = int(startIndex) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- print("Preparing for reference data...") # -------------------------------------- # Reference examples, get reference discounted rewards and reference slowdown from random, SJF and Tetris algorithms ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=True, plot=False, repre=repre, end=end) mean_rew_lr_curve = [] max_rew_lr_curve = [] slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in xrange(startIndex, pa.num_epochs): ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = range(pa.num_ex) np.random.shuffle(ex_indices) all_eprews = [] grads_all = [] loss_all = [] eprews = [] eplens = [] all_slowdown = [] all_entropy = [] ex_counter = 0 for ex in xrange(pa.num_ex): ex_idx = ex_indices[ex] p = Process(target=get_traj_worker, args=( pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 # append pa.num_ex number of Processes in ps until going inside if if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print ex + 1, "out of", pa.num_ex ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples( [r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad( all_ob, all_action, all_adv) #(states, actions, values) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result ])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result ])) # episode lengths all_slowdown.extend( np.concatenate([r["all_slowdown"] for r in result])) all_entropy.extend( np.concatenate([r["all_entropy"] for r in result])) # assemble gradients grads = grads_all[0] for i in xrange(1, len(grads_all)): for j in xrange(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in xrange(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end = time.time() print "-----------------" print "Iteration: \t %i" % iteration print "NumTrajs: \t %i" % len(eprews) print "NumTimesteps: \t %i" % np.sum(eplens) # print "Loss: \t %s" % np.mean(loss_all) print "MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews]) print "MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews)) print "MeanSlowdown: \t %s" % np.mean(all_slowdown) print "MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens)) print "MeanEntropy \t %s" % (np.mean(all_entropy)) print "Elapsed time\t %s" % (timer_end - timer_start), "seconds" print "-----------------" f = open('log/re_log_' + datetime.now().strftime('%Y-%m-%d_%H:%M:%S'), 'w+') f.write("-----------------\n") f.write("Iteration: \t %i\n" % (iteration)) f.write("NumTrajs: \t %i\n" % (len(eprews))) f.write("NumTimesteps: \t %i\n" % (np.sum(eplens))) # f.write("Loss: \t %s\n".format(loss)) f.write("MaxRew: \t %s\n" % (np.average([np.max(rew) for rew in all_eprews]))) f.write("MeanRew: \t %s +- %s\n" % (np.mean(eprews), np.std(eprews))) f.write("MeanSlowdown: \t %s\n" % (np.mean(all_slowdown))) f.write("MeanLen: \t %s +- %s\n" % (np.mean(eplens), np.std(eplens))) f.write("MeanEntropy \t %s\n" % ((np.mean(all_entropy)))) f.write("Elapsed time\t %s seconds\n" % ((timer_end - timer_start))) f.write("-----------------\n") f.close() timer_start = time.time() max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews])) mean_rew_lr_curve.append(np.mean(eprews)) slow_down_lr_curve.append(np.mean(all_slowdown)) if iteration % pa.output_freq == 0: param_file = open( pa.output_filename + '_' + str(iteration) + '.pkl', 'wb') cPickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1) param_file.close() # added by wjchen, to record accuracy and rewards sample_file = h5py.File('log/re_record'+str(len(slow_down_lr_curve))\ + datetime.now().strftime('%Y-%m-%d_%H:%M')+'.h5', 'w') sample_file.create_dataset('max_rew_lr_curve', data=max_rew_lr_curve) sample_file.create_dataset('mean_rew_lr_curve', data=mean_rew_lr_curve) sample_file.create_dataset('slow_down_lr_curve', data=slow_down_lr_curve) ref_dr = sample_file.create_group('ref_discount_rews') for k, v in ref_discount_rews.items(): ref_dr[k] = np.average(v) ref_sd = sample_file.create_group('ref_slow_down') for k, v in ref_slow_down.items(): ref_sd[k] = np.average(np.concatenate(v)) sample_file.close() # print ref_slow_down # print ref_discount_rews # print '\n----Reference Slowdown----' for k, v in ref_slow_down.items(): print "{}: {}".format(k, np.average(np.concatenate(v))) print '\n----Reference Discount Reward----' for k, v in ref_discount_rews.items(): print "{}: {}".format(k, np.average(v)) pa.unseen = True slow_down_cdf.launch(pa, pa.output_filename + '_' + str(iteration) + '.pkl', render=True, plot=True, repre=repre, end=end) pa.unseen = False # test on unseen examples plot_lr_curve(pa.output_filename, max_rew_lr_curve, mean_rew_lr_curve, slow_down_lr_curve, ref_discount_rews, ref_slow_down ) # draw average of ref_discount_rews, ref_slow_down
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # ---------------------------- print("Preparing for workers...") # ---------------------------- envs = [] nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work(pa, seed=42) for ex in range(pa.num_ex): print("-prepare for env-", ex) env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) print("-prepare for worker-") sess = tf.Session() actor = actor_critic_brain.Actor(sess, n_features=pa.network_input_height*pa.network_input_width, n_actions=pa.network_output_dim, lr=0.001) critic = actor_critic_brain.Critic(sess, n_features=pa.network_input_height*pa.network_input_width, lr=0.01) sess.run(tf.global_variables_initializer()) if pg_resume is not None: pass # rl.load_data(pg_resume) # accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- print("Preparing for reference data...") # -------------------------------------- ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) mean_rew_lr_curve = [] max_rew_lr_curve = [] slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in range(1, pa.num_epochs): ex_indices = list(range(pa.num_ex)) np.random.shuffle(ex_indices) eprewlist = [] eplenlist =[] slowdownlist =[] ex_counter = 0 for ex in range(pa.num_ex): ex_idx = ex_indices[ex] eprew, eplen, slowdown = get_traj_worker(actor, critic, envs[ex_idx], pa) eprewlist.append(eprew) eplenlist.append(eplen) slowdownlist.append(slowdown) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print("\n\n") ex_counter = 0 timer_end = time.time() print("-----------------") print("Iteration: \t %i" % iteration) # print("NumTrajs: \t %i" % len(eprewlist)) print("NumTimesteps: \t %i" % np.sum(eplenlist)) # print("MaxRew: \t %s" % np.average([np.max(rew) for rew in eprewlist])) # print("MeanRew: \t %s +- %s" % (np.mean(eprewlist), np.std(eprewlist))) print("MeanSlowdown: \t %s" % np.mean([np.mean(sd) for sd in slowdownlist])) print("MeanLen: \t %s +- %s" % (np.mean(eplenlist), np.std(eplenlist))) print("Elapsed time\t %s" % (timer_end - timer_start), "seconds") print("-----------------") timer_start = time.time() max_rew_lr_curve.append(np.average([np.max(rew) for rew in eprewlist])) mean_rew_lr_curve.append(np.mean(eprewlist)) slow_down_lr_curve.append(np.mean([np.mean(sd) for sd in slowdownlist])) if iteration % pa.output_freq == 0: # rl.save_data(pa.output_filename + '_' + str(iteration)) pa.unseen = True # slow_down_cdf.launch(pa, pa.output_filename + '_' + str(iteration) + '.ckpt', # render=False, plot=True, repre=repre, end=end) pa.unseen = False # test on unseen examples plot_lr_curve(pa.output_filename, max_rew_lr_curve, mean_rew_lr_curve, slow_down_lr_curve, ref_discount_rews, ref_slow_down)