def test_multi(self): passed = 0 def network_builder(inputs): layer = layers['dense'] state0 = layer(x=layer(x=inputs['state0'], size=32), size=32) state1 = layer(x=layer(x=inputs['state1'], size=32), size=32) state2 = layer(x=layer(x=inputs['state2'], size=32), size=32) return state0 * state1 * state2 for _ in xrange(5): environment = MinimalTest( definition=[False, (False, 2), (False, (1, 2))]) config = Configuration(batch_size=8, learning_rate=0.001, memory_capacity=800, first_update=80, target_update_frequency=20, demo_memory_capacity=100, demo_sampling_ratio=0.2, states=environment.states, actions=environment.actions, network=network_builder) agent = DQFDAgent(config=config) # First generate demonstration data and pretrain demonstrations = list() terminal = True for n in xrange(50): if terminal: state = environment.reset() action = dict(action0=1, action1=(1, 1), action2=((1, 1), )) state, reward, terminal = environment.execute(action=action) demonstration = dict(state=state, action=action, reward=reward, terminal=terminal, internal=[]) demonstrations.append(demonstration) agent.import_demonstrations(demonstrations) agent.pretrain(steps=1000) # Normal training runner = Runner(agent=agent, environment=environment) def episode_finished(r): return r.episode < 50 or not all( x >= 1.0 for x in r.episode_rewards[-50:]) runner.run(episodes=1000, episode_finished=episode_finished) print('DQFD agent (multi-state/action): ' + str(runner.episode)) if runner.episode < 1000: passed += 1 print('DQFD agent (multi-state/action) passed = {}'.format(passed)) self.assertTrue(passed >= 4)
def test_discrete(self): passed = 0 for _ in xrange(5): environment = MinimalTest(definition=False) config = Configuration(batch_size=8, learning_rate=0.001, memory_capacity=800, first_update=80, target_update_frequency=20, demo_memory_capacity=100, demo_sampling_ratio=0.2, memory=dict(type='replay', random_sampling=True), states=environment.states, actions=environment.actions, network=layered_network_builder([ dict(type='dense', size=32), dict(type='dense', size=32) ])) agent = DQFDAgent(config=config) # First generate demonstration data and pretrain demonstrations = list() terminal = True for n in xrange(50): if terminal: state = environment.reset() action = 1 state, reward, terminal = environment.execute(action=action) demonstration = dict(state=state, action=action, reward=reward, terminal=terminal, internal=[]) demonstrations.append(demonstration) agent.import_demonstrations(demonstrations) agent.pretrain(steps=1000) # Normal training runner = Runner(agent=agent, environment=environment) def episode_finished(r): return r.episode < 100 or not all( x / l >= reward_threshold for x, l in zip( r.episode_rewards[-100:], r.episode_lengths[-100:])) runner.run(episodes=1000, episode_finished=episode_finished) print('DQFD agent: ' + str(runner.episode)) if runner.episode < 1000: passed += 1 print('DQFD agent passed = {}'.format(passed)) self.assertTrue(passed >= 4)
def test_dqfd_agent(self): passed = 0 for _ in xrange(5): environment = MinimalTest(continuous=False) config = Configuration( batch_size=16, learning_rate=0.001, memory_capacity=800, first_update=80, repeat_update=4, target_update_frequency=20, demo_memory_capacity=100, demo_sampling_ratio=0.1, states=environment.states, actions=environment.actions, network=layered_network_builder(layers_config=[ dict(type='dense', size=32, l2_regularization=0.0001) ])) agent = DQFDAgent(config=config) # First generate demonstration data and pretrain demonstrations = list() terminal = True for n in xrange(50): if terminal: state = environment.reset() action = 1 state, reward, terminal = environment.execute(action=action) demonstration = dict(state=state, action=action, reward=reward, terminal=terminal, internal=[]) demonstrations.append(demonstration) agent.import_demonstrations(demonstrations) agent.pretrain(steps=1000) # Normal training runner = Runner(agent=agent, environment=environment) def episode_finished(r): return r.episode < 100 or not all( x >= 1.0 for x in r.episode_rewards[-100:]) runner.run(episodes=1000, episode_finished=episode_finished) print('DQFD Agent: ' + str(runner.episode)) if runner.episode < 1000: passed += 1 print('DQFD Agent passed = {}'.format(passed)) self.assertTrue(passed >= 4)
def get_dqfd_agent(environment, bootstrap, *args, **kwargs): with open('config/cnn_network.json', 'r') as infile: network = json.load(infile) agent = DQFDAgent( states=environment.states, actions=environment.actions, network=network, memory={ "type": "replay", "capacity": 32000, "include_next_states": True, }, saver={ "directory": "checkpoint/dqfd", "seconds": 1800, }, ) if bootstrap: internals = agent.current_internals for demo in load_demos(): states = demo['states'] moves = demo['moves'] demonstrations = [ { "states": state, "internals": internals, "actions": move, "terminal": False, "reward": 1, } for state, move in zip(states, moves) ] demonstrations[-1]['terminal'] = True demonstrations[-1]['reward'] = -1 agent.import_demonstrations(demonstrations) print("Pretraining agent network") agent.pretrain(steps=15000) print("Saving trained network") agent.model.save() else: agent.model.restore() return agent
def test_dqfd_agent(self): passed = 0 for _ in xrange(5): environment = MinimalTest(continuous=False) config = Configuration( batch_size=16, learning_rate=0.001, memory_capacity=800, first_update=80, repeat_update=4, target_update_frequency=20, demo_memory_capacity=100, demo_sampling_ratio=0.1, states=environment.states, actions=environment.actions, network=layered_network_builder(layers_config=[dict(type='dense', size=32, l2_regularization=0.0001)]) ) agent = DQFDAgent(config=config) # First generate demonstration data and pretrain demonstrations = list() terminal = True for n in xrange(50): if terminal: state = environment.reset() action = 1 state, reward, terminal = environment.execute(action=action) demonstration = dict(state=state, action=action, reward=reward, terminal=terminal, internal=[]) demonstrations.append(demonstration) agent.import_demonstrations(demonstrations) agent.pretrain(steps=1000) # Normal training runner = Runner(agent=agent, environment=environment) def episode_finished(r): return r.episode < 100 or not all(x >= 1.0 for x in r.episode_rewards[-100:]) runner.run(episodes=1000, episode_finished=episode_finished) print('DQFD Agent: ' + str(runner.episode)) if runner.episode < 1000: passed += 1 print('DQFD Agent passed = {}'.format(passed)) self.assertTrue(passed >= 4)
def createDQFDAgent(states, actions, rewards, terminals): agent = DQFDAgent( states = env.states, actions = env.actions, network=[ dict(type='dense', size=networkFirstLayer), dict(type='dense', size=int((networkFirstLayer*networkLastLayer)**0.5)), # geometric average of first and last dict(type='dense', size=networkLastLayer), ], optimizer=dict(type='adam', learning_rate=1e-4), target_sync_frequency=10, ) demonstrations = dict( states = states, #internals = internals, actions = actions, terminal = terminal, reward = reward ) agent.import_demonstrations(demonstrations = demonstrations) agent.pretrain(steps = 24 * 10) return agent