if args.obj is not None: obj = args.obj envs = [] for g, seeds in zip(graphs, e_seeds_list): env = NetworkEnv(fullGraph=g, seeds=seeds, opt_reward=0, nop_r=args.nop_reward, times_mean=args.times_mean_env, bad_reward=args.bad_reward, clip_max=args.max_reward, clip_min=args.min_reward, normalize=args.norm_reward) envs.append(env) replay = PriortizedReplay(BUFF_SIZE, 10, beta=0.6) # BUFF_SIZE2 = 80 replay_her = PriortizedReplay(BUFF_SIZE, 10, beta=0.6) goal_envs = [] for g, seeds in zip(graphs, e_seeds_list): env = NetworkEnv2(fullGraph=g, seeds=seeds, opt_reward=0, nop_r=args.nop_reward, times_mean=2, bad_reward=args.bad_reward, clip_max=args.max_reward, clip_min=args.min_reward, normalize=args.norm_reward, budget=budget)
ch.append(np.mean(rs)) print("Change for %s is %f" % (gp, ch[-1])) logging.info('Change Results:' + str(obj1) + ' ' + str(S1)) final_goal = 0 if args.obj is not None: obj = args.obj envs = [] for g, seeds in zip(graphs, e_seeds_list): env = NetworkEnv(fullGraph=g, seeds=seeds, opt_reward=0, nop_r=args.nop_reward, times_mean=args.times_mean_env, bad_reward=args.bad_reward, clip_max=args.max_reward, clip_min=args.min_reward, normalize=args.norm_reward) envs.append(env) replay = PriortizedReplay(BUFF_SIZE, 10, beta=0.6) # BUFF_SIZE2 = 80 replay_her = PriortizedReplay(BUFF_SIZE, 10, beta=0.6) goal_envs = [] for g, seeds in zip(graphs, e_seeds_list): env = NetworkEnv2(fullGraph=g, seeds=seeds, opt_reward=0, nop_r=args.nop_reward, times_mean=args.times_mean_env, bad_reward=args.bad_reward, clip_max=args.max_reward, clip_min=args.min_reward, normalize=args.norm_reward, budget=budget) goal_envs.append(env) logging.info('State Dimensions: ' + str(action_dim)) logging.info('Action Dimensions: ' + str(action_dim)) acmodel = DQNTrainer(input_dim=input_dim, state_dim=action_dim, action_dim=action_dim, replayBuff=replay, lr=LR, use_cuda=use_cuda, gamma=args.gamma,