res_opa = np.zeros(len(test_dataset), dtype=int).tolist() for i, sample in test_dataset: inputs.append((sample, args.num_procs, args.use_deadline)) for i, ret in tqdm(enumerate(executor.map(wrap, inputs))): res_opa[i] = ret opares = np.sum(res_opa) print("[before training][OPA generates %d]" % opares) load_fname = "globalRL-p%d-t%d-d%d-l" % (args.num_procs, args.num_tasks, args.use_deadline) tmp = torch.load("../Pandamodels/globalrlmodels/" + load_fname + ".torchmodel").cuda() rl_model = Solver(args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=True) rl_model.load_state_dict(tmp.state_dict()) if args.use_cuda: rl_model.cuda() """Freeze the weight of the global reinforcement model""" freezing_param_name = ["init_w", "embedding", "mha"] for name, param in rl_model.named_parameters(): if name.split(".")[1] in freezing_param_name: param.requires_grad = False """Evaluate global model before the training""" rl_model.eval() ret = [] for i, batch in eval_loader: if use_cuda:
print("log_probability\t", log_prob.detach().numpy().mean()) print( "[at epoch %d][RL model generates %d][heuristic generates %d][liu generates %d]" % (epoch, (R > 0).sum().detach().numpy(), (heuristic_distance > 0).sum().numpy(), (liu_boundary > 0).sum().numpy())) #print("AVG R", R.float().mean().detach().numpy()) model.train() #if (R > 0).sum() >= len(test_dataset) // 2: # break if __name__ == "__main__": if args.use_cuda: use_pin_memory = True else: use_pin_memory = False model = Solver(args.num_procs, 2, args.embedding_size, args.hidden_size, args.seq_len) for level in [0.5, 0.7, 0.8, 0.9, 0.95]: train_dataset = SchedSingleDataset(args.num_procs, args.seq_len, args.num_tr_dataset, util_range=(0.6, 0.95)) test_dataset = SchedSingleDataset(args.num_procs, args.seq_len, args.num_te_dataset, util_range=(0.6, 0.95)) run(train_dataset, test_dataset, model)
pin_memory=use_pin_memory) test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=use_pin_memory) eval_loader = DataLoader(test_dataset, batch_size=args.num_test_dataset, shuffle=False) # Calculating heuristics model = Solver(args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=use_deadline) if args.use_cuda: model = model.cuda() # Train loop moving_avg = torch.zeros(args.num_train_dataset) if args.use_cuda: moving_avg = moving_avg.cuda() #generating first baseline cc = 1 for (indices, sample_batch) in tqdm(train_data_loader): if args.use_cuda: sample_batch = sample_batch.cuda()
def kl_div(n_step): util_range = get_util_range(args.num_procs) trsets = [] tesets = [] on = False for util in util_range: on = False if util == args.range_l: on = True if on: if positive: load_file_name = "../Pandadata/tr/%d-%d/positive/%s" else: load_file_name = "../Pandadata/tr/%d-%d/%s" with open(load_file_name % (args.num_procs, args.num_tasks, util), 'rb') as f: ts = pickle.load(f) trsets.append(ts) with open("../Pandadata/te/%d-%d/%s" % (args.num_procs, args.num_tasks, util), 'rb') as f: ts = pickle.load(f) tesets.append(ts) if util == args.range_r: break train_dataset = Datasets(trsets) test_dataset = Datasets(tesets) train_dataset.setlen(args.num_train_dataset) test_dataset.setlen(args.num_test_dataset) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True ) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True ) eval_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True ) temp_fname = "localRL-p%d-t%d-d%d-l[%s, %s].torchmodel" % \ (args.num_procs, args.num_tasks, int(use_deadline), args.range_l, args.range_r) model = torch.load("../Pandamodels/localrlmodels/" + temp_fname).cuda() rl_model = Solver( args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=True, ret_embedded_vector=True, ) rl_model.load_state_dict(model.state_dict()) if use_cuda: model = model.cuda() rl_model = rl_model.cuda() rl_model = rl_model.eval() if use_cuda: rl_model = rl_model.to("cuda:0") ss = np.array(list(range(32))) ss2 = np.array(list(reversed(range(32)))) guide = torch.LongTensor(np.array([ss, ss2], dtype=np.int32)).cuda() for epoch in range(args.num_epochs): loss_ = 0 avg_hit = [] for batch_idx, (_, sample_batch) in enumerate(train_loader): sample_batch = sample_batch[:2, :, :] _, actions, distributions = rl_model(sample_batch, guide=guide) break break a = distributions[0][5].detach().cpu().numpy() b = distributions[1][5].detach().cpu().numpy() print(0.5 * np.sum(np.abs(a - b))) exit(0) kl_div = 0 KL_calc = torch.nn.KLDivLoss(reduction="batchmean") actions = actions.squeeze() # Timestep +1 if n_step == 1: for t in range(args.num_tasks - 1): previous_distribution = distributions[t].squeeze() sampled_task = actions[t] previous_distribution[sampled_task] = 0 # renormalized_distribution = previous_distribution renormalized_distribution = torch.log(previous_distribution / torch.sum(previous_distribution)) next_distribution = distributions[t+1] kl_div += KL_calc(renormalized_distribution, next_distribution) # kl_div += torch.nn.KLDivLoss(size_average=False)(renormalized_distribution, next_distribution) return kl_div / (args.num_tasks-1) # Timestep +3 elif n_step == 3: for t in range(args.num_tasks - 3): prev_distribution = distributions[t].squeeze() first_sampled_mask = actions[t] second_sampled_mask = actions[t+1] third_sampled_mask = actions[t+2] prev_distribution[first_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[second_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution = prev_distribution.detach().cpu().numpy() # renormalized_distribution = torch.log(prev_distribution) rl_next_distribution = distributions[t+2] rl_next_distribution = rl_next_distribution.detach().cpu().numpy() kl_div += np.sum(np.abs(prev_distribution - rl_next_distribution)) # kl_div += KL_calc(renormalized_distribution, rl_next_distribution) return kl_div / (args.num_tasks-3) # Timestep +5 else: for t in range(args.num_tasks - 5): prev_distribution = distributions[t].squeeze() first_sampled_mask = actions[t] second_sampled_mask = actions[t+1] third_sampled_mask = actions[t+2] fourth_sampled_mask = actions[t+3] fifth_sampled_mask = actions[t+4] prev_distribution[first_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[second_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[third_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[fourth_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[fifth_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) renormalized_distribution = torch.log(prev_distribution) rl_next_distribution = distributions[t+5] kl_div += KL_calc(renormalized_distribution, rl_next_distribution) return kl_div / (args.num_tasks - 5)