def extract_feat(resource, work_packages): work_packages = [[ p.getX(), p.getY(), p.getUrgency(), p.getWorkingTime(), p.getUrgency(), p.getId() ] for p in work_packages] rx, ry, rid = resource.getPosition() gnn_feature = np.zeros((define.get_value('package_num') + 1, define.get_value('package_num') + 1, 4)) for p_i in work_packages: i = p_i[5] for p_j in work_packages: j = p_j[5] gnn_feature[i, j, 0] = p_i[2] gnn_feature[i, j, 1] = p_j[2] gnn_feature[ i, j, 2] = define.dis(p_i[0], p_j[0], p_i[1], p_j[1]) / define.get_value('speed') + p_j[3] gnn_feature[i, j, 3] = 1 for p in work_packages: i = p[5] gnn_feature[-1, i, 2] = define.dis( p[0], rx, p[1], ry) / define.get_value('speed') + p[3] gnn_feature[i, -1, 2] = define.dis(p[0], rx, p[1], ry) / define.get_value('speed') return torch.from_numpy(gnn_feature).unsqueeze(0).float()
def __init__(self, id=None, x=None, y=None, timeLimit=None, speed=None): self.__id = id self.__currentX = x self.__currentY = y self.__initialX = x self.__initialY = y self.__workingTime = 0 self.__timeLimit = define.get_value( 'time_limit') if timeLimit is None else timeLimit self.__speed = define.get_value('speed') if speed is None else speed self.__pid = -1
def wrapper(idx): r_pos, locs, ps = data[idx] resource = [ Resource("resource", r_pos[0], r_pos[1], timeLimit=define.get_value('time_limit'), speed=define.get_value('speed')) ] packages = [] for i, (loc, p) in enumerate(zip(locs, ps)): packages.append(WorkPackage(i, loc[0], loc[1], p, 0)) return packages, resource
def data_wrapper(package_num, seed): resource, packages = func_dict[define.get_value('func_type')](package_num, seed) ret_resource = [ Resource("resource", resource[0], resource[1], timeLimit=define.get_value('time_limit'), speed=define.get_value('speed')) ] ret_packages = [] for i, p in enumerate(packages): ret_packages.append(WorkPackage(i, p[0], p[1], p[2], p[3])) return ret_packages, ret_resource
def __init__(self, resource, packages, net=None, is_dqn=None, device='cpu', base=None, time_limit=None, dis_matrix=None, feature=None): self.__resource = copy.deepcopy(resource) self.__workPackages = [] self.__totalUrgency = 0 self.__existWorkPackages = {} self.__is_dqn = is_dqn self.__net = net self.__packages = packages self.device = device self.dis_matrix = None self.q = None self.score = None self.base = base self.dis_matrix = dis_matrix self.q = None self.score = None self.base = base self.feature = feature self.all_urgency = np.array([wk.getUrgency() for wk in packages]) self.package_num = define.get_value('package_num') self.time_limit = define.get_value('time_limit') self.speed = define.get_value('speed') if self.__net is not None and dis_matrix is None: tmp_X = np.zeros((self.package_num, self.package_num, 2)) tmp_Y = np.zeros((self.package_num, self.package_num, 2)) tmp_time = np.zeros((self.package_num, self.package_num)) for wk in packages: tmp_X[wk.getId(), :, 0] = wk.getX() tmp_X[wk.getId(), :, 1] = wk.getY() tmp_Y[:, wk.getId(), 0] = wk.getX() tmp_Y[:, wk.getId(), 1] = wk.getY() tmp_time[:, wk.getId()] = wk.getWorkingTime() self.dis_matrix = np.sqrt(np.sum(np.square(tmp_X - tmp_Y), axis=2)) / self.speed + tmp_time self.feature = feature_extractor.feature_extractor2( [[resource.getInitialX(), resource.getInitialY()], packages, self.time_limit, [resource.getInitialX(), resource.getInitialY()]])
def step(self, action): if self.mask[0, action] == 1: done = 1 reward = 0 print('seed') else: package = self.packages[action] self.times = self.path.getResourceNeedTime( package) + self.path.getResourceWorkingTime() if self.done == 1 or self.times + self.path.getReturnTime( package) > define.get_value('time_limit'): done = 1 self.done = 1 reward = 0 else: done = 0 self.mask[0, action] = 1 reward = package.getUrgency() self.path.addWorkPackage(package) self.path.setResourceWorkingTime(self.times) self.path.setResourcePosition(package.getX(), package.getY(), package.getId()) return torch.FloatTensor([reward]).to(self.device), torch.FloatTensor( [done]).to(self.device), self.mask.to(self.device)
def gen_random_data(package_num, seed): workpackages = [] resources = [] random.seed(seed) tmp = Resource("resource", random.random(), random.random(), timeLimit=define.get_value('time_limit'), speed=define.get_value('speed')) resources.append(tmp) for i in range(package_num): x = random.random() y = random.random() urgency = random.randint(1, 100) / 100 working_time = random.random() * 0.1 tmp = WorkPackage(i, x, y, urgency, working_time) workpackages.append(tmp) return workpackages, resources
def reset(self, i): self.seed = self.range_start + i % (self.range_end - self.range_start) self.count = (self.count + 1) % (self.range_end - self.range_start) self.packages, self.resources = gen_data.wrapper(self.seed) self.mask = torch.zeros(1, define.get_value('package_num') + 1) self.mask[0, -1] =1 self.path = path_obj.Path(self.resources[0], self.packages, None, False, self.device) self.reward = 0 self.times = 0
def beam_search(encoder, decoder, beam_size): import copy total_baselines = 0 time_start = time.time() for _index in range(0, 10000): env = Env(_index, _index + 1) beam_list = [[env, 1, 0, 0]] max_rewards = 0 state = env.reset(0) envs = Envs([env for env, prob, action, reward in beam_list]) emb = encoder(state) first = True while True: if first: first = False prob = decoder(emb.expand(len(envs.envs), -1, -1), envs.masks().to(device), envs.times().to(device), None) else: prob = decoder( emb.expand(len(envs.envs), -1, -1), envs.masks().to(device), envs.times().to(device), torch.LongTensor([a for e, p, a, r in beam_list]).to(device)) prob = prob.cpu().detach().numpy() tmp_list = [] for i in range(len(beam_list)): for j in range(define.get_value('package_num')): if prob[i, j] < 1e-10: continue tmp_list.append([ beam_list[i][0], beam_list[i][1] * prob[i, j], j, beam_list[i][3] ]) tmp_list.sort(key=lambda x: x[1], reverse=True) tmp_list = tmp_list[:beam_size] envs = Envs( [copy.deepcopy(env) for env, prob, action, r in tmp_list]) action = [a for env, prob, a, r in tmp_list] rewards, dones, masks, all_done = envs.step(action) beam_list.clear() rewards = rewards.cpu().numpy() for i, (env, prob, a, r) in enumerate(tmp_list): r = r + rewards[i] max_rewards = max(max_rewards, r) beam_list.append([copy.deepcopy(envs.envs[i]), prob, a, r]) if np.sum(1 - dones.cpu().numpy()) == 0: break total_baselines += max_rewards print('{} {} {}'.format(_index, total_baselines / (_index + 1), (time.time() - time_start) / (_index + 1)))
def feature_extractor2(sample): resource, work_packages, total_time, end_axis = sample work_packages = [[ p.getX(), p.getY(), p.getUrgency(), p.getWorkingTime(), p.getUrgency(), p.getId() ] for p in work_packages] gnn_feature = np.zeros( (define.get_value('package_num'), define.get_value('package_num'), 8)) mask = np.zeros( (define.get_value('package_num'), define.get_value('package_num'), 1)) for p_i in work_packages: i = p_i[5] for p_j in work_packages: j = p_j[5] gnn_feature[i, j, 0] = p_i[2] gnn_feature[i, j, 1] = p_j[2] # gnn_feature[i, j, 2] = p_i[3] # gnn_feature[i, j, 3] = p_j[3] gnn_feature[i, j, 2] = define.dis(end_axis[0], p_i[0], end_axis[1], p_i[1]) / define.get_value('speed') gnn_feature[i, j, 3] = define.dis(end_axis[0], p_j[0], end_axis[1], p_j[1]) / define.get_value('speed') gnn_feature[ i, j, 4] = define.dis(p_i[0], p_j[0], p_i[1], p_j[1]) / define.get_value('speed') + p_j[3] gnn_feature[ i, j, 5] = define.dis(resource[0], p_i[0], resource[1], p_i[1]) / define.get_value('speed') + p_i[3] gnn_feature[ i, j, 6] = define.dis(resource[0], p_j[0], resource[1], p_j[1]) / define.get_value('speed') + p_j[3] gnn_feature[i, j, 7] = total_time mask[i, j] = 1 # for i in range(7): # print(np.max(gnn_feature[:,:,i])) # input() return np.expand_dims(gnn_feature, axis=0), np.expand_dims(mask, axis=0)
def step(self, action, reset=True): package = self.packages[action] times = self.path.getResourceNeedTime(package) + self.path.getResourceWorkingTime() if self.done==1 or times + self.path.getReturnTime(package) > define.get_value('time_limit'): self.done = 1 done = 1 reward = 0 if reset: self.reset() else: self.done = 0 done = 0 reward = package.getUrgency() self.path.addWorkPackage(package) self.path.setResourceWorkingTime(times) self.path.setResourcePosition(package.getX(), package.getY(), package.getId()) if self.path.getWorkPackageSize() >= define.get_value('package_num'): self.done = 1 done = 1 self.reset() return self.path.to_state(), torch.FloatTensor([reward]).to(self.device), torch.FloatTensor([done]).to(self.device)
def dqn_schedule(model, data, device, plan_limit=None, time_limit=None, time_interval=None): plan_limit = define.get_value( 'plan_limit') if plan_limit is None else plan_limit time_limit = define.get_value( 'time_limit') if time_limit is None else time_limit time_interval = define.get_value( 'time_interval') if time_interval is None else time_interval data0, data1 = data workpackages, resources = data0 time0 = time.time() schedule = SchedulePolicy(resources, workpackages, None, model, is_dqn=True, time_limit=time_limit, plan_limit=plan_limit, time_interval=time_interval, device=device, batch_size=args.batch_size, data=data1) schedule.greedySchedule() ret = schedule.get_urgency() global lock, counter, timer, values with lock: timer.value += time.time() - time0 values.value += ret counter.value += 1 if counter.value % 1 == 0: sys.stdout.write('\r{} {:.4f} {:.4f}'.format( counter.value, values.value / counter.value, timer.value / counter.value)) sys.stdout.flush() return ret, schedule.path_idx
def no_dqn_schedule(data, plan_limit=None, time_limit=None, time_interval=None): plan_limit = define.get_value( 'plan_limit') if plan_limit is None else plan_limit time_limit = define.get_value( 'time_limit') if time_limit is None else time_limit time_interval = define.get_value( 'time_interval') if time_interval is None else time_interval workpackages, resources = data schedule = Schedule(resources, workpackages, replay_memory=None, net=None, is_dqn=False, time_limit=time_limit, plan_limit=plan_limit, time_interval=time_interval) schedule.greedySchedule() # schedule.vis('x_{}_{}'.format(seed,plan_limit)) # print('time spent : {0:.3f}s'.format(time.time() - time0)) return schedule.get_urgency(), schedule.path_idx
def beam_search(model, beam_size): import copy total_baselines = 0 time_start = time.time() for _index in range(0,10000): env = Env(_index, _index+1) beam_list = [[env, 1, 0, 0]] max_rewards = 0 state = env.reset() envs = Envs([env for env, prob, action, reward in beam_list]) while True: state = envs.to_state() prob, value = model(*state) # prob = prob1.cpu().detach().numpy() # del prob1, value1 # if len(envs.envs) > 2: # probx, value2 = model(state[0][len(envs.envs)//2+1:], state[1][len(envs.envs)//2+1:]) # prob2 = probx.cpu().detach().numpy() # del probx, value2 # prob = np.concatenate([prob, prob2], axis=0) prob = prob.cpu().detach().numpy() tmp_list = [] for i in range(len(beam_list)): for j in range(define.get_value('package_num')): if prob[i, j] < 1e-10: continue tmp_list.append([beam_list[i][0], beam_list[i][1] * prob[i, j], j, beam_list[i][3]]) tmp_list.sort(key = lambda x:x[1], reverse=True) tmp_list = tmp_list[:beam_size] envs = Envs([copy.deepcopy(env) for env, prob, action, r in tmp_list]) action = [a for env, prob, a, r in tmp_list] next_state, reward, done = envs.step(action, False) beam_list.clear() reward = reward.cpu().numpy() for i, (env, prob, a, r) in enumerate(tmp_list): r = r + reward[i] max_rewards = max(max_rewards, r) beam_list.append([copy.deepcopy(envs.envs[i]), prob, a, r]) if np.sum(1 - done.cpu().numpy()) == 0: break total_baselines += max_rewards print('{} {} {}'.format(_index, total_baselines/(_index+1), (time.time()-time_start)/(_index+1)))
def evaluate(model, seed): returns = 0 packages, resources = gen_data.wrapper(seed) path = path_obj.Path(resources[0], packages, lambda x:0, True, device) while True: state = path.to_state() prob, value = model(*state) action = torch.argmax(prob, dim=1) action = action.cpu().numpy()[0] package = packages[action] times = path.getResourceNeedTime(package) + path.getResourceWorkingTime() if times + path.getReturnTime(package) > define.get_value('time_limit'): return returns reward = package.getUrgency() path.addWorkPackage(package) path.setResourceWorkingTime(times) path.setResourcePosition(package.getX(), package.getY(), package.getId()) returns += reward return returns
parser.add_argument("--beam", action='store_true', help='generate RL sample or IL sample') parser.add_argument("--func-type", type=str, help='generate RL sample or IL sample') args = parser.parse_args() define.init() define.set_value('package_num', args.package_num) define.set_value('time_limit', args.time_limit) define.set_value('func_type', args.func_type) gen_data.generate_data(100000, args.package_num, args.func_type) device = torch.device(args.device) encoder = GraphNet( hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer).to(device) decoder = GraphNetDecoder(hidden_size=args.hidden_size).to(device) if args.beam: encoder.load_state_dict(torch.load('model/model_encoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type) ,map_location=device)) decoder.load_state_dict(torch.load('model/model_decoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type) ,map_location=device)) encoder.eval() decoder.eval() print('load successfully') gen_data.generate_data(10000, args.package_num, args.func_type) beam_search(encoder, decoder, 100) if args.test: encoder.load_state_dict(torch.load('model/model_encoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type) ,map_location=device)) decoder.load_state_dict(torch.load('model/model_decoder_pn_{}_{}.ckpt'.format(define.get_value('package_num'), args.func_type) ,map_location=device))
parser.add_argument('--beam', action='store_true', help='beam search or not') args = parser.parse_args() define.init() define.set_value('package_num', args.package_num) define.set_value('time_limit', args.time_limit) define.set_value('func_type', args.func_type) if args.device == 'cpu': torch.set_num_threads(58) else: torch.set_num_threads(1) if args.beam: device = torch.device(args.device) model = GraphNet( hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer, duel_dqn=args.duel_dqn) model.load_state_dict(torch.load('model/model_dqn{}_{}_{}_{}_{}_{}_{}_{}_{}.ckpt'.format(args.fn, define.get_value('package_num'), args.func_type, args.num_env, args.hidden_size, args.nhead, args.nlayer, 'double' if args.double_dqn else 'vanilla', 'duel' if args.duel_dqn else 'vanilla') ,map_location=torch.device('cpu'))) model = model.to(device) model.eval() gen_data.generate_data(10000, args.package_num, args.func_type) beam_search(model, 100) exit() if args.path: device = torch.device(args.device) model = GraphNet( hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer, duel_dqn=args.duel_dqn) model.load_state_dict(torch.load('model/model_dqn{}_{}_{}_{}_{}_{}_{}_{}_{}.ckpt'.format(args.fn, define.get_value('package_num'), args.func_type, args.num_env, args.hidden_size, args.nhead, args.nlayer, 'double' if args.double_dqn else 'vanilla', 'duel' if args.duel_dqn else 'vanilla') ,map_location=torch.device('cpu')))
define.init() define.set_value('package_num', args.package_num) define.set_value('time_limit', args.time_limit) define.set_value('func_type', args.func_type) gen_data.generate_data(100000, args.package_num, args.func_type) device = torch.device(args.device) encoder = GraphNet(hidden_size=args.hidden_size, n_head=args.nhead, nlayers=args.nlayer).to(device) decoder = GraphNetDecoder(hidden_size=args.hidden_size).to(device) if args.beam: encoder.load_state_dict( torch.load('model/model_att_encoder_{}_{}.ckpt'.format( define.get_value('package_num'), args.func_type), map_location=device)) decoder.load_state_dict( torch.load('model/model_att_decoder_{}_{}.ckpt'.format( define.get_value('package_num'), args.func_type), map_location=device)) encoder.eval() decoder.eval() gen_data.generate_data(10000, args.package_num, args.func_type) beam_search(encoder, decoder, 100) if args.test: encoder.load_state_dict( torch.load('model/model_att_encoder_{}_{}.ckpt'.format( define.get_value('package_num'), args.func_type),