def exp_b(net, name, change, device,flag=False): envs = [make_env(net, p, flag) for p in range(net.num_envs)] net.set_envs(envs, name, change) #agent = ptan.agent.PolicyAgent(lambda x: net(x)[0], apply_softmax=True, device=device) agent = Model.AgentA2C(net_act, device=device) exp_buf = [] for l in range(NUM_ENVS): exp_source = ptan.experience.ExperienceSource(envs[l], agent, steps_count=1) exp_buf.append(exp_source) return exp_buf
def exp_b(net_1, name, change, device, flag=False): envs = [make_env(net_1, p, flag) for p in range(net_1.num_envs)] net_1.set_envs(envs, name, change) agent = Model.AgentA2C(net_1, device=device) exp_buf = [] for l in range(NUM_ENVS): exp_source = ptan.experience.ExperienceSource(envs[l], agent, steps_count=1) exp_buf.append(exp_source) return exp_buf, agent