def test_3_alphago_value(self): print("TEST 3\n=====================================================") gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) tf.config.set_soft_device_placement(True) except RuntimeError as e: print(e) rows, cols = 19, 19 encoder = AlphaGoEncoder() input_shape = (encoder.num_planes, rows, cols) alphago_value_network = alphago_model(input_shape) alphago_value = ValueAgent(alphago_value_network, encoder) experience = load_experience( h5py.File('test_alphago_rl_experience.h5', 'r')) alphago_value.train(experience) with h5py.File('test_alphago_value.h5', 'w') as value_agent_out: alphago_value.serialize(value_agent_out)
def test_1_supervised_learning(self): print("TEST 1\n=====================================================") gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) tf.config.set_soft_device_placement(True) except RuntimeError as e: print(e) rows, cols = 19, 19 encoder = AlphaGoEncoder() input_shape = (encoder.num_planes, rows, cols) alphago_sl_policy = alphago_model(input_shape, is_policy_net=True) alphago_sl_policy.compile('sgd', 'categorical_crossentropy', metrics=['accuracy']) alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder) inputs = np.ones((10, ) + input_shape) outputs = alphago_sl_policy.predict(inputs) assert (outputs.shape == (10, 361)) with h5py.File('test_alphago_sl_policy.h5', 'w') as sl_agent_out: alphago_sl_agent.serialize(sl_agent_out)
def main(): # sl data encoder = AlphaGoEncoder() processor = GoDataProcessor(encoder=encoder.name()) # Paraller Processor generator = processor.load_go_data('train', NUM_GAMES, use_generator=True) test_generator = processor.load_go_data('test', NUM_GAMES, use_generator=True) # Data Processor # todo: does not have use_generator capability # generator = processor.load_go_data('train', NUM_GAMES) # test_generator = processor.load_go_data('test', NUM_GAMES) # sl model input_shape = (encoder.num_planes, ROWS, COLS) alphago_sl_policy = alphago_model(input_shape=input_shape, is_policy_net=True) # read earlier trained bot bot_filepath = 'alphago/alpha_sl_policy_e13_1k.h5' alphago_sl_policy.load_weights(bot_filepath) alphago_sl_policy.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) # sl train epochs = 200 batch_size = 128 alphago_sl_policy.fit_generator( generator=generator.generate(batch_size, NUM_CLASSES), epochs=epochs, steps_per_epoch=generator.get_num_samples() / batch_size, validation_data=test_generator.generate(batch_size, NUM_CLASSES), validation_steps=test_generator.get_num_samples() / batch_size, callbacks=[ModelCheckpoint('alphago_sl_policy_load_train_{epoch}.h5')]) alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder) # save model with h5py.File('alphago_sl_policy_load_train.h5', 'w') as sl_agent_out: alphago_sl_agent.serialize(sl_agent_out) # evaluate alphago_sl_policy.evaluate_generator( generator=test_generator.generate(batch_size, NUM_CLASSES), steps=test_generator.get_num_samples() / batch_size)
def test_3_alphago_value(self): rows, cols = 19, 19 encoder = AlphaGoEncoder() input_shape = (encoder.num_planes, rows, cols) alphago_value_network = alphago_model(input_shape) alphago_value = ValueAgent(alphago_value_network, encoder) experience = load_experience( h5py.File('test_alphago_rl_experience.h5', 'r')) alphago_value.train(experience) with h5py.File('test_alphago_value.h5', 'w') as value_agent_out: alphago_value.serialize(value_agent_out)
def test_1_supervised_learning(self): rows, cols = 19, 19 encoder = AlphaGoEncoder() input_shape = (encoder.num_planes, rows, cols) alphago_sl_policy = alphago_model(input_shape, is_policy_net=True) alphago_sl_policy.compile('sgd', 'categorical_crossentropy', metrics=['accuracy']) alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder) inputs = np.ones((10, ) + input_shape) outputs = alphago_sl_policy.predict(inputs) assert (outputs.shape == (10, 361)) with h5py.File('test_alphago_sl_policy.h5', 'w') as sl_agent_out: alphago_sl_agent.serialize(sl_agent_out)
# tag::init_value[] from dlgo.networks.alphago import alphago_model from dlgo.encoders.alphago import AlphaGoEncoder from dlgo.rl import ValueAgent, load_experience import h5py rows, cols = 19, 19 encoder = AlphaGoEncoder() input_shape = (encoder.num_planes, rows, cols) alphago_value_network = alphago_model(input_shape) alphago_value = ValueAgent(alphago_value_network, encoder) # end::init_value[] # tag::train_value[] experience = load_experience(h5py.File('alphago_rl_experience.h5', 'r')) alphago_value.train(experience) with h5py.File('alphago_value.h5', 'w') as value_agent_out: alphago_value.serialize(value_agent_out) # end::train_value[]
from dlgo.data.parallel_processor import GoDataProcessor from dlgo.encoders.alphago import AlphaGoEncoder from dlgo.agent.predict import DeepLearningAgent from dlgo.networks.alphago import alphago_model rows, cols = 19, 19 num_classes = rows * cols num_games = 10000 encoder = AlphaGoEncoder() processor = GoDataProcessor(encoder=encoder.name()) generator = processor.load_go_data('train', num_games, use_generator=True) test_generator = processor.load_go_data('test', num_games, use_generator=True) input_shape = (encoder.num_planes, rows, cols) alphago_sl_policy = alphago_model(input_shape, is_policy_net=True) alphago_sl_policy.compile('sgd', 'categorical_crossentropy', metrics=['accuracy']) epochs = 200 batch_size = 128 alphago_sl_policy = fit_generator( generator=generator.generate(batch_size, num_classes), epochs=epochs, steps_per_epoch=generator.get_num_samples() / batch_size, validation_data=test_generator.generate(batch_size, num_classes), validation_steps=test_generator.get_num_samples() / batch_size, callbacks=[ModelCheckpoint('alphago_sl_policy_{epoch}.h5')]) alphago_sl_agent = DeepLearningAgent(alphago_sl_policy, encoder)