Python PPOAgent.initialize примеры использования

Язык программирования: Python

Пространство имен/Пакет: tensorforce.agents

Класс/Тип: PPOAgent

Метод/Функция: initialize

Примеров на hotexamples.com: 4

Python PPOAgent.initialize - 4 примера найдено. Это лучшие примеры Python кода для tensorforce.agents.PPOAgent.initialize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PPOAgent(30)

act(26)

restore_model(25)

observe(21)

initialize(4)

restore(4)

close(2)

__init__(1)

initialize_model(1)

reset(1)

Пример #1

Показать файл

Файл: test_readme.py Проект: srinivasiyengar/drl_shape_optimization-1

    def test_readme(self):
        environment = UnittestEnvironment(states=dict(type='float',
                                                      shape=(10, )),
                                          actions=dict(type='int',
                                                       num_values=5))

        def get_current_state():
            return environment.reset()

        def execute_decision(x):
            return environment.execute(actions=x)[2]

        # Instantiate a Tensorforce agent
        agent = PPOAgent(states=dict(type='float', shape=(10, )),
                         actions=dict(type='int', num_values=5),
                         memory=10000,
                         network='auto',
                         update_mode=dict(unit='episodes', batch_size=10),
                         step_optimizer=dict(type='adam', learning_rate=1e-4))

        # Initialize the agent
        agent.initialize()

        # Retrieve the latest (observable) environment state
        state = get_current_state()  # (float array of shape [10])

        # Query the agent for its action decision
        action = agent.act(states=state)  # (scalar between 0 and 4)

        # Execute the decision and retrieve the current performance score
        reward = execute_decision(action)  # (any scalar float)

        # Pass feedback about performance (and termination) to the agent
        agent.observe(reward=reward, terminal=False)

        agent.close()
        environment.close()
        self.assertTrue(expr=True)

Пример #2

Показать файл

Файл: player_pun.py Проект: ClemsonTRACE/Team-Cog-Agents-Semester2

        actions={
            "up": dict(type="float", min_value=0.0, max_value=1.0),
            "down": dict(type="float", min_value=0.0, max_value=1.0),
            "left": dict(type="float", min_value=0.0, max_value=1.0),
            "right": dict(type="float", min_value=0.0, max_value=1.0),
        },
        network='auto',
        memory=10000,
    )

else:
    print("Available agents: vpg, ppo, dqn")
    exit()

print("agent ready", agent)
agent.initialize()  # Set up base of agent

try:  # Looks to see if a saved model is available and loads it
    lastEpoch = int(
        os.listdir(tmp + "/saved/player_pun/" + args.agent)[2].split("-")[0])

    agent.restore(directory=tmp + "/saved/player_pun/" + args.agent)
    print("restored")
except Exception as e:  # starts fresh if no saved model is available
    print("DID NOT RESTORE")
    lastEpoch = 0

epochs = 2000000

for epoch in tqdm(range(lastEpoch, epochs + 1)):
    #print(epoch)

Пример #3

Показать файл

Файл: recommender.py Проект: lbarberiscanoni/bubble-poppers

            "user": dict(type="int", num_values=G.graph.shape[0]),
            "item": dict(type="int", num_values=G.graph.shape[1])
        },
        network=[
            dict(type='flatten'),
            dict(type="dense", size=32),
        ],
        memory=10000,
    )

print("agent ready", agent)

if args.process == "train":

    new_agent = copy.deepcopy(agent)
    agent.initialize()

    try:
        lastEpoch = int(os.listdir("saved/" + args.agent)[2].split("-")[0])

        agent.restore(directory="saved/" + args.agent + "/" + args.contrarian)
        print("restored")
    except:
        lastEpoch = 0

    epochs = 100000
    cluster_vals = []
    for epoch in tqdm(range(lastEpoch, epochs)):
        G = Audience(20, 15)

        #20 reccomendations for every user

Пример #4

Показать файл

class SerpentPPO:
    def __init__(self, frame_shape=None, game_inputs=None):

        if frame_shape is None:
            raise SerpentError("A 'frame_shape' tuple kwarg is required...")

        states_spec = {"type": "float", "shape": frame_shape}

        if game_inputs is None:
            raise SerpentError("A 'game_inputs' dict kwarg is required...")

        self.game_inputs = game_inputs
        self.game_inputs_mapping = self._generate_game_inputs_mapping()

        print('game inputs mapping:')
        print(self.game_inputs_mapping)
        actions_spec = {"type": "int", "num_values": len(self.game_inputs)}

        summary_spec = {
            "directory":
            "./board/",
            "steps":
            50,
            "labels": [
                "configuration", "gradients_scalar", "regularization",
                "inputs", "losses", "variables"
            ]
        }

        network_spec = [{
            "type": "conv2d",
            "size": 16,
            "window": 8,
            "stride": 4
        }, {
            "type": "conv2d",
            "size": 32,
            "window": 4,
            "stride": 2
        }, {
            "type": "conv2d",
            "size": 32,
            "window": 3,
            "stride": 1
        }, {
            "type": "flatten"
        }, {
            "type": "dense",
            "size": 64
        }]

        baseline_spec = {
            "type": "cnn",
            "conv_sizes": [32, 32],
            "dense_sizes": [32]
        }

        saver_spec = {
            "directory": os.path.join(os.getcwd(), "datasets",
                                      "t4androidmodel"),
            "seconds": 120
        }
        #         memory_spec = {'type':'latest', 'include_next_states':False, 'capacity':1000*1000}

        self.agent = PPOAgent(
            states=states_spec,
            actions=actions_spec,
            network=network_spec,
            #             baseline_mode='states',
            #             baseline=baseline_spec,
            summarizer=summary_spec,
            memory=10,
            update_mode=dict(unit='timesteps', batch_size=2),
            discount=0.97,
            saver=saver_spec)

        self.agent.initialize()
#
#             batched_observe=2560,
#             scope="ppo",
#             summarizer=summary_spec,
#             network=network_spec,
#             device=None,
#             session_config=None,
#             saver_spec=None,
#             distributed_spec=None,
#             discount=0.97,
#             variable_noise=None,
#             states_preprocessing_spec=None,
#             explorations_spec=None,
#             reward_preprocessing_spec=None,
#             distributions_spec=None,
#             entropy_regularization=0.01,
#             batch_size=2560,
#             keep_last_timestep=True,
#             baseline_mode=None,
#             baseline=None,
#             baseline_optimizer=None,
#             gae_lambda=None,
#             likelihood_ratio_clipping=None,
#             step_optimizer=None,
#             optimization_steps=10
#
#         )

    def generate_action(self, game_frame_buffer):
        states = np.stack(game_frame_buffer, axis=2)

        action = self.agent.act(states)
        label = self.game_inputs_mapping[action]

        return action, label, self.game_inputs[label]

    def observe(self, reward=0, terminal=False):
        self.agent.observe(reward=reward, terminal=terminal)

    def _generate_game_inputs_mapping(self):
        mapping = dict()

        for index, key in enumerate(self.game_inputs):
            mapping[index] = key

        return mapping