def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # env = environment.Env(pa, render=False, repre=repre, end=end) pg_learner = policy_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'r') net_params = cPickle.load(net_handle) pg_learner.set_net_params(net_params) # if pa.evaluate_policy_name == "SJF": # evaluate_policy = other_agents.get_sjf_action # elif pa.evaluate_policy_name == "PACKER": # evaluate_policy = other_agents.get_packer_action # else: # print("Panic: no policy known to evaluate.") # exit(1) # ---------------------------- print("Preparing for data...") # ---------------------------- # nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work(pa, seed=42) # print 'nw_time_seqs=', nw_len_seqs # print 'nw_size_seqs=', nw_size_seqs # mem_alloc = 4 # X = np.zeros([pa.simu_len * pa.num_ex * mem_alloc, 1, # pa.network_input_height, pa.network_input_width], # dtype=theano.config.floatX) # y = np.zeros(pa.simu_len * pa.num_ex * mem_alloc, # dtype='int32') # print 'network_input_height=', pa.network_input_height # print 'network_input_width=', pa.network_input_width counter = 0 # for train_ex in range(pa.num_ex): # env.reset() # for _ in xrange(pa.episode_max_length): # # ---- get current state ---- # ob = env.observe() # a = evaluate_policy(env.machine, env.job_slot) # if counter < pa.simu_len * pa.num_ex * mem_alloc: # add_sample(X, y, counter, ob, a) # counter += 1 # ob, rew, done, info = env.step(a, repeat=True) # if done: # hit void action, exit # break # # roll to next example # env.seq_no = (env.seq_no + 1) % env.pa.num_ex # num_train = int(0.8 * counter) # num_test = int(0.2 * counter) # X_train, X_test = X[:num_train], X[num_train: num_train + num_test] # y_train, y_test = y[:num_train], y[num_train: num_train + num_test] # Normalization, make sure nothing becomes NaN # X_mean = np.average(X[:num_train + num_test], axis=0) # X_std = np.std(X[:num_train + num_test], axis=0) # # X_train = (X_train - X_mean) / X_std # X_test = (X_test - X_mean) / X_std with open( '/home/temp_siplab/shanka/temporary/rl_sudoku/sudoku4_trainx_easy_med.pkl', 'rb') as fp: X = pickle.load(fp) with open( '/home/temp_siplab/shanka/temporary/rl_sudoku/sudoku4_trainy_easy_med.pkl', 'rb') as fp: y = pickle.load(fp).astype(np.int32) num_train = int(0.8 * X.shape[0]) num_test = int(0.2 * X.shape[0]) X_train, X_test = X[:num_train], X[num_train:num_train + num_test] y_train, y_test = y[:num_train], y[num_train:num_train + num_test] print('Number of training samples>>>', X_train.shape[0]) print('Number of testing samples>>>', X_test.shape[0]) # ---------------------------- print("Start training...") # ---------------------------- for epoch in range(pa.num_su_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, pa.batch_size, shuffle=True): inputs, targets = batch err, prob_act = pg_learner.su_train(inputs, targets) pg_act = np.argmax(prob_act, axis=1) train_err += err train_acc += np.sum(pg_act == targets) train_batches += 1 # # And a full pass over the test data: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, pa.batch_size, shuffle=False): inputs, targets = batch err, prob_act = pg_learner.su_test(inputs, targets) pg_act = np.argmax(prob_act, axis=1) test_err += err test_acc += np.sum(pg_act == targets) test_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, pa.num_su_epochs, time.time() - start_time)) print(" training loss: \t\t{:.6f}".format(train_err / train_batches)) print(" training accuracy:\t\t{:.2f} %".format( train_acc / float(num_train) * 100)) print(" test loss: \t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy: \t\t{:.2f} %".format(test_acc / float(num_test) * 100)) sys.stdout.flush() if epoch % pa.output_freq == 0: net_file = open( pa.output_filename + '4x4_easy_med_' + str(epoch) + '.pkl', 'wb') pickle.dump(pg_learner.return_net_params(), net_file, -1) net_file.close() print("done")
def test2(pa): def ex_test(pg_learner, env, pa, result): env.reset() env.generate_workload() ob = env.observe() acts = [] probs = [] cpu_crs = [0]*pa.num_machines cpu_crs_max = [0]*pa.num_machines mem_crs = [0]*pa.num_machines mem_crs_max = [0]*pa.num_machines rews = [] c_utils = '' m_utils= '' suffer = [] finished_episode_len = 0 logline = '' for _ in range(pa.episode_max_length): act_prob = pg_learner.get_one_act_prob(ob) csprob_n = np.cumsum(act_prob) a = np.argmax(act_prob) ob, rews, done, status= env.step(a, _, rews) acts.append(a) probs.append(act_prob) if status == 'Allocation_Success': finished_episode_len = _ + 1 if done: break ##############logs c_util = '' m_util= '' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.cpus_left >= 0: c_util += str(machine.total_cpus - machine.cpus_left) + ',' else: c_util += str(machine.total_cpus) + ',' suffer.append(abs(machine.cpus_left)) else: c_util += str(0) + ',' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.mems_left >= 0: m_util += str(machine.total_mems - machine.mems_left) + ',' else: m_util += str(machine.total_mems) + ',' suffer.append(abs(machine.mems_left)) else: m_util += str(0) + ',' cpu_crs_this_time = [0]*pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i+1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] if task_i != task_j and len(task_i.cpu_util)>0 and len(task_j.cpu_util)>0: cpu_crs[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) cpu_crs_this_time[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) cpu_crs_max[k] = max(cpu_crs_max[k], cpu_crs_this_time[k]) ################# mem_crs_this_time = [0]*pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i+1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] if task_i != task_j and len(task_i.mem_util)>0 and len(task_j.mem_util)>0: mem_crs[k] += pa.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1) mem_crs_this_time[k] += pa.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1) mem_crs_max[k] = max(mem_crs_max[k], mem_crs_this_time[k]) ################# c_utils += c_util + '|' m_utils += m_util + '|' logline += str(str(_-1)+'|'+str(c_utils) + str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str(sum(suffer)) +'\n' logline+=str(m_utils) +'\n' for i in range(len(env.machines)): logline += str(cpu_crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(cpu_crs_max[i]) + ',' logline = logline[:-1] logline += '\n' for i in range(len(env.machines)): logline += str(mem_crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(mem_crs_max[i]) + ',' logline = logline[:-1] logline += '\n' result.append(logline) pg_learners = [] envs = [] task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() for ex in range(pa.num_ex): print("-prepare for env-", ex) env = Env(0, 1) env.workload_seq = workloads[ex] envs.append(env) for ex in range(pa.batch_size): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = policy_network.PGLearner(pa) pg_learners.append(pg_learner) logs = open('/home/dell/logs_cpu_mem_aischedule', 'a') loglines = '' for it in range(1300, 1320, 20): if(it % 10): print('Iteration : ',it) pg_resume = '/home/dell/testing_part2/' + str(it) + '.pkl_' net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) for ex in range(pa.batch_size): pg_learners[ex].set_net_params(net_params) ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_counter = 0 loglines += str(it) + '\n' for ex in range(pa.num_ex): p = Process(target=ex_test, args=(pg_learners[ex_counter], envs[ex], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_test_ex - 1: ex_counter = 0 for p in ps: p.start() for p in ps: p.join() # convert list from shared memory for r in manager_result: loglines += r ps = [] manager_result = manager.list([]) logs.write(loglines)
def launch(pa, pg_resume=None, save_freq = 50, render=False, repre='image', end='no_new_job', test_only=False): task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() if test_only: test(0, pa, pg_resume, workloads) return pg_learners = [] envs = [] for ex in range(pa.num_ex): print("-prepare for env-", ex) env = Env(0, 1) env.workload_seq = workloads[ex] envs.append(env) for ex in range(pa.batch_size + 1): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = policy_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) print ('Preparing for Training from Scratch...') # ref_discount_rews=slow_down_cdf.launch(pa,pg_resume=None,render=False,repre=repre,end=end) all_test_rews = [] timer_start=time.time() logs = open('/tmp/logs', 'a') loglines = '' for iteration in range(1, pa.num_epochs+1): ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = list(range(pa.num_ex)) # np.random.shuffle(ex_indices) all_ob=[] all_action=[] grads_all = [] eprews = [] eplens = [] all_adv=[] all_eprews=[] all_eplens=[] ex_counter = 0 for ex in range(pa.num_ex): ex_idx = ex_indices[ex] p = Process(target=get_traj_worker, args=(pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print(ex+1, "out of", pa.num_ex) ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples([r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad(all_ob, all_action, all_adv) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result])) # episode lengths # assemble gradients grads = grads_all[0] for i in range(1, len(grads_all)): for j in range(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in range(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end=time.time() print ("-----------------") print ("Iteration: \t %i" % iteration) print ("NumTrajs: \t %i" % len(eprews)) print ("NumTimesteps: \t %i" % np.sum(eplens)) print ("Elapsed time\t %s" % (timer_end - timer_start), "seconds") print ("-----------------") # time.sleep(5) pg_resume = '/home/dell/testing_part2/%s_10ex.pkl_' % str(iteration) if iteration % 10 == 0: param_file = open(pg_resume, 'wb') pickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1) param_file.close() if iteration % 20 == 0: logline = test(iteration, pa, pg_resume, workloads,pg_learners[pa.batch_size]) loglines += logline if iteration % 20 == 0: logs.write(loglines) logs.flush() os.fsync(logs.fileno()) loglines = '' logs.close()
def test(it, pa ,pg_resume, workloads, pg_learner=None, episode_max_length=200): if pg_learner is None: pg_learner=policy_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) env = Env(0, 1) flag=1 logline = str(it) + '\n' for ex in range(pa.num_ex): env.reset() env.workload_seq = workloads[ex] env.generate_workload() print('Testing : ', env.workload_seq) ob = env.observe() acts = [] probs = [] crs = [0]*pa.num_machines crs_max = [0]*pa.num_machines rews = [] final_obs=[] final_acts=[] final_rews=[] indices=[] json_array = [] utils = 0 suffer = [] for _ in range(episode_max_length): act_prob = pg_learner.get_one_act_prob(ob) csprob_n = np.cumsum(act_prob) a = np.argmax(act_prob) #################json # json_all_machines = [] # for k, machine in enumerate(env.machines): # # print(k) # json_machine_array = [] # for task in machine.running_tasks: # json_task = {} # json_task['name'] = task.service # if len(task.cpu_util) < pa.hist_wind_len: # json_task['util'] = [0 for x in range(pa.hist_wind_len)] # json_task['util'][-len(task.cpu_util):] = task.cpu_util[-len(task.cpu_util):] # # print(len(json_task['util'])) # else: # json_task['util'] = task.cpu_util[-pa.hist_wind_len:] # json_machine_array.append(json_task) # json_all_machines.append(json_machine_array) # json_incoming_tasks = [] # x = [] # for task in env.waiting_tasks: # x.append(task.service) # json_all_machines.append(x) # json_all_machines.append(str(a)) # if len(json_array) > 0: # if status == 'Allocation_Success': # x = [] # z = env.waiting_tasks[-len(prev_waiting_tasks)+1:] # for task in z: # x.append(task.service) # json_array[-1].append(x) # else: # x = [] # z = env.waiting_tasks[-len(prev_waiting_tasks):] # for task in z: # x.append(task.service) # json_array[-1].append(x) #json_array.append(json_all_machines) # prev_waiting_tasks = env.waiting_tasks ################# # plt1 = visualize_state(ob, pa, '/tmp/trajs/'+str(_)+'.jpg') #if _ < sum([len(i) for i in workloads[0]]): # print('Agent action: ', a) man_act = input('Manual Action : ') a = int(man_act) #acts.append(a) ob, rews, done, status= env.step(a, _, rews) acts.append(a) probs.append(act_prob) if status == 'Allocation_Success': finished_episode_len = _ + 1 if status !='Backlog_Empty': indices.append(_) if done: break ##############logs util = [] for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.cpus_left >= 0: util.append(machine.total_cpus - machine.cpus_left) else: util.append(machine.total_cpus) suffer.append(abs(machine.cpus_left)) crs_this_time = [0]*pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i+1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] if task_i != task_j and len(task_i.cpu_util)>0 and len(task_j.cpu_util)>0: crs[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) crs_this_time[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) crs_max[k] = max(crs_max[k], crs_this_time[k]) ################# utils += sum(util)/len(util) for c in indices: final_acts.append(acts[c]) final_rews.append(rews[c]) for i in range(len(env.machines)): logline += str(crs[i]) + ',' logline += str(sum(rews)) + '\n' + str(utils) + '\n' + str(sum(suffer)) +'\n' + str(finished_episode_len) + '\n' for i in range(len(env.machines)): logline += str(crs_max[i]) + ',' logline += '\n' if it % 20 == 0: print('Test Actions: ',final_acts) print(probs[:finished_episode_len]) print('Reward : ', final_rews) print('Full Reward: ',rews) print('Reward : ', sum(rews)) # with open('/home/rnehra/json_logs/'+str(ex)+'.json', 'w') as json_file: # json.dump(json_array, json_file) return logline
def test(it, pa ,pg_resume, pg_learner=None, episode_max_length=200): if pg_learner is None: pg_learner=policy_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) accuracy=0. #logline = str(it) + '\n' for ex in range(pa.num_test_ex): env = Environment(ex+pa.num_ex) ob=env.current_grid print(sudoku.unflatten(ob)) print('Testing : ') acts = [] probs = [] rews = [] final_obs=[] final_acts=[] final_rews=[] indices=[] json_array = [] utils = 0 suffer = [] for _ in range(pa.episode_max_length): act_prob = pg_learner.get_one_act_prob(ob) csprob_n = np.cumsum(act_prob) a = np.argmax(act_prob) #################json # prev_waiting_tasks = env.waiting_tasks ################# # plt1 = visualize_state(ob, pa, '/tmp/trajs/'+str(_)+'.jpg') # if _ < sum([len(i) for i in workloads[0]]): # print('Agent action: ', a) # man_act = input('Manual Action : ') # if man_act: # a = int(man_act) ob, rews, mistake, done= env.act(a) acts.append(a) probs.append(act_prob) final_rews.append(rews) if done: break ##############logs if sum(final_rews)==0: accuracy+=1 if it % 20 == 0: print('Test Actions: ',acts) #print(probs) print('Reward : ', sum(final_rews)) print('Full Reward: ',final_rews) print('Accuracy:',accuracy/pa.num_test_ex)