def reset(self, i): self.seed = self.range_start + i % (self.range_end - self.range_start) self.count = (self.count + 1) % (self.range_end - self.range_start) self.packages, self.resources = gen_data.wrapper(self.seed) self.mask = torch.zeros(1, define.get_value('package_num') + 1) self.mask[0, -1] =1 self.path = path_obj.Path(self.resources[0], self.packages, None, False, self.device) self.reward = 0 self.times = 0
def evaluate(model, seed): returns = 0 packages, resources = gen_data.wrapper(seed) path = path_obj.Path(resources[0], packages, lambda x:0, True, device) while True: state = path.to_state() prob, value = model(*state) action = torch.argmax(prob, dim=1) action = action.cpu().numpy()[0] package = packages[action] times = path.getResourceNeedTime(package) + path.getResourceWorkingTime() if times + path.getReturnTime(package) > define.get_value('time_limit'): return returns reward = package.getUrgency() path.addWorkPackage(package) path.setResourceWorkingTime(times) path.setResourcePosition(package.getX(), package.getY(), package.getId()) returns += reward return returns
def reset(self): self.packages, self.resources = gen_data.wrapper(self.range_start + self.count) self.count = (self.count + 1) % (self.range_end - self.range_start) self.path = path_obj.Path(self.resources[0], self.packages, lambda x:0, True, self.device) self.done = 0 return self.path.to_state()
# model.load_state_dict() # model = model.to(device) # model.eval() # torch.no_grad() define.init() define.set_value('package_num', args.package_num) define.set_value('time_limit', args.time_limit) define.set_value('time_interval', args.time_interval) define.set_value('func_type', args.func_type) gen_data.generate_data(10000, args.package_num, args.func_type) if args.mode == 'no': pool = Pool(args.pool_num) result = pool.map(f, [gen_data.wrapper(i) for i in range(0, args.span)]) print(result) p = [r[0][0] for r in result] s = [r[0][1] for r in result] print(np.mean(p, axis=0)) np.save('output/output_uniform_{}_{}'.format(args.seed, args.span), p) pickle.dump([r[0] for r in result], open('solution/no_dqn.pkl', 'wb')) elif args.mode == 'dqn': state_dict = torch.load('model/model_{}.ckpt'.format(args.model), map_location=torch.device('cpu')) datas = [gen_data.wrapper(i) for i in range(0, args.span)] data2s = [gen_data.get_data(i) for i in range(0, args.span)] print(len(datas), args.span) if args.debug: result = [] for line in [[