def generate_game(board_size, rounds, max_moves, temperature): boards, moves = [], [] encoder = get_encoder_by_name('oneplane', board_size) game = goboard.GameState.new_game(board_size) bot = mcts.MCTSAgent(rounds, temperature) num_moves = 0 while not game.is_over(): print_board(game.board) move = bot.select_move(game) if move.is_play: boards.append(encoder.encode(game)) move_one_hot = np.zeros(encoder.num_points()) move_one_hot[encoder.encode_point(move.point)] = 1 moves.append(move_one_hot) print_move(game.next_player, move) game = game.apply_move(move) num_moves += 1 if num_moves > max_moves: break return np.array(boards), np.array(moves) # <10>
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--network', default='large') parser.add_argument('--hidden-size', type=int, default=512) parser.add_argument('output_file') args = parser.parse_args() encoder = encoders.get_encoder_by_name('simple', args.board_size) board_input = Input(shape=encoder.shape(), name='board_input') processed_board = board_input network = getattr(dlgo.networks, args.network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) policy_hidden_layer = Dense(args.hidden_size, activation='relu')(processed_board) policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer) value_hidden_layer = Dense(args.hidden_size, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) new_agent = rl.ACAgent(model, encoder) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)
def main(): workdir = r'/media/nail/SSD_Disk/Models/' board_size = 19 network = 'large' hidden_size = 512 output_file = workdir + 'ac_agent.h5' encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') processed_board = board_input network = getattr(dlgo.networks, network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) policy_hidden_layer = Dense(hidden_size, activation='relu')(processed_board) policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer) value_hidden_layer = Dense(hidden_size, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) new_agent = rl.ACAgent(model, encoder) with h5py.File(output_file, 'w') as outf: new_agent.serialize(outf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--network', default='large') parser.add_argument('--hidden-size', type=int, default=512) parser.add_argument('--output_file', '-o', type=str, default='./agents/q_agent_0.h5') args = parser.parse_args() encoder = encoders.get_encoder_by_name('simple', args.board_size) board_input = Input(shape=encoder.shape(), name='board_input') action_input = Input(shape=(encoder.num_points(),), name='action_input') processed_board = board_input network = getattr(dlgo.networks, args.network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) board_plus_action = concatenate([action_input, processed_board]) hidden_layer = Dense(args.hidden_size, activation='relu')(board_plus_action) value_output = Dense(1, activation='sigmoid')(hidden_layer) model = Model(inputs=[board_input, action_input], outputs=value_output) new_agent = rl.QAgent(model, encoder) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--output-file', required=True) args = parser.parse_args() board_size = args.board_size output_file = args.output_file encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') action_input = Input(shape=(encoder.num_points(), ), name='action_input') conv1a = ZeroPadding2D((2, 2))(board_input) conv1b = Conv2D(64, (5, 5), activation='relu')(conv1a) conv2a = ZeroPadding2D((1, 1))(conv1b) conv2b = Conv2D(64, (3, 3), activation='relu')(conv2a) flat = Flatten()(conv2b) processed_board = Dense(512)(flat) board_and_action = concatenate([action_input, processed_board]) hidden_layer = Dense(256, activation='relu')(board_and_action) value_output = Dense(1, activation='tanh')(hidden_layer) model = Model(inputs=[board_input, action_input], outputs=value_output) new_agent = rl.QAgent(model, encoder) with h5py.File(output_file, 'w') as outf: new_agent.serialize(outf)
def generate_games(board_size, rounds, max_moves, temperature): boards, moves = [], [ ] #In boards you store encoded board state; moves is for encoded moves encoder = get_encoder_by_name( 'oneplane', board_size ) #Initialize a OnePlaneEncoder by name with given board size game = goboard.GameState.new_game( board_size) #A new game of size board_size is instantiated bot = mcts.MCTSAgent(rounds, temperature) num_moves = 0 while not game.is_over(): print_board(game.board) move = bot.select_move(game) if move.is_play: boards.append(encoder.encode(game)) move_one_hot = np.zeros(encoder.num_point()) move_ont_hot[encoder.encode_point(move.point)] = 1 moves.append(move_one_hot) print_move(game.next_player, move) game = game.apply_move(move) num_moves += 1 if num_moves > max_moves: break return np.array(boards), np.array(moves)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--output-file') args = parser.parse_args() board_size = args.board_size output_file = args.output_file encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') conv1 = Conv2D(64, (3, 3), padding='same', activation='relu')(board_input) conv2 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv1) conv3 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv2) flat = Flatten()(conv3) processed_board = Dense(512)(flat) policy_hidden_layer = Dense(512, activation='relu')(processed_board) policy_ouput = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer) value_hidden_layer = Dense(512, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=board_input, outputs=[policy_ouput, value_output]) new_agent = rl.ACAgent(model, encoder) with h5py.File(output_file, 'w') as outf: new_agent.serialize(outf)
def create_v_model(pth="//home//nail//Code_Go//checkpoints//", board_size=19, network='large', hidden_size=512, lr=0.01): output_file = pth + 'v_model' + '.h5' encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') action_input = Input(shape=(encoder.num_points(), ), name='action_input') processed_board = board_input network = getattr(dlgo.networks, network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) board_plus_action = concatenate([action_input, processed_board]) hidden_layer = Dense(hidden_size, activation='relu')(board_plus_action) value_output = Dense(1, activation='sigmoid')(hidden_layer) model = Model(inputs=[board_input, action_input], outputs=value_output) opt = SGD(lr=lr) model.compile(loss='mse', optimizer=opt) new_agent = rl.ValueAgent(model, encoder) with h5py.File(output_file, 'w') as outf: new_agent.serialize(outf) return new_agent
def load_policy_agent(h5file): model = kerasutil.load_model_from_hdf5_group(h5file['model']) encoder_name = h5file['encoder'].attrs['name'] board_width = h5file['encoder'].attrs['board_width'] board_height = h5file['encoder'].attrs['board_height'] encoder = encoders.get_encoder_by_name(encoder_name, (board_width, board_height)) return PolicyAgent(model, encoder)
def load_prediction_agent(h5file): model = kerasutil.load_model_from_hdf5_group(h5file['model']) encoder_name = h5file['encoder'].attrs['name'] if not isinstance(encoder_name, str): encoder_name = encoder_name.decode('ascii') board_width = h5file['encoder'].attrs['board_width'] board_height = h5file['encoder'].attrs['board_height'] encoder = encoders.get_encoder_by_name(encoder_name, (board_width, board_height)) return DeepLearningAgent(model, encoder)
def load_policy_agent(h5file): model = kerasutil.load_model_from_hdf5_group( h5file['model'], custom_objects={'policy_gradient_loss': policy_gradient_loss}) encoder_name = h5file['encoder'].attrs['name'] if not isinstance(encoder_name, str): encoder_name = encoder_name.decode('ascii') board_width = h5file['encoder'].attrs['board_width'] board_height = h5file['encoder'].attrs['board_height'] encoder = encoders.get_encoder_by_name(encoder_name, (board_width, board_height)) return PolicyAgent(model, encoder)
def load_policy_agent(h5file): model = kerasutil.load_model_from_hdf5_group( h5file['model'] ) # Uses built in Keras functions to load the model structure and weights encoder_name = h5file['encoder'].attrs[ 'name'] # Recovers the board encoder board_width = h5file['encoder'].attrs['board_width'] board_height = h5file['encoder'].attrs['board_height'] if type(encoder_name) == bytes: encoder_name = encoder_name.decode() encoder = encoders.get_encoder_by_name(encoder_name, (board_width, board_height)) return PolicyAgent(model, encoder) # Reconstructs the agent
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) # parser.add_argument('--network', default='large') # parser.add_argument('--hidden-size', type=int, default=512) parser.add_argument('output_file') args = parser.parse_args() encoder = encoders.get_encoder_by_name('zero', args.board_size) model = networks.dual_residual_network(input_shape=encoder.shape(), blocks=8) model.summary() new_agent = zero.ZeroAgent(model, encoder, rounds_per_move=1000, c=2.0) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('output_file') args = parser.parse_args() encoder = encoders.get_encoder_by_name('simple', args.board_size) model = Sequential() for layer in dlgo.networks.leaky.layers(encoder.shape()): model.add(layer) model.add(Dense(encoder.num_points())) model.add(Activation('softmax')) opt = SGD(lr=0.02) model.compile(loss=agent.policy_gradient_loss, optimizer=opt) new_agent = agent.PolicyAgent(model, encoder) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)
def generate_game(board_size, rounds, max_moves, temperature): # boardsにはエンコードされた盤の状態が格納され、movesにはエンコードされた着手が格納される boards, moves = [], [] # <1> # OnePlaneEncoderを指定された盤のサイズで初期化する encoder = get_encoder_by_name('oneplane', board_size) # <2> # サイズboard_sizeの新しいゲームがインスタンス化される game = goboard.GameState.new_game(board_size) # <3> # ラウンド数と温度が指定されたモンテカルロ木探索エージェントがボットになる bot = mcts.MCTSAgent(rounds, temperature) # <4> num_moves = 0 while not game.is_over(): print_board(game.board) # 次の着手がボットによって選択される move = bot.select_move(game) # <5> if move.is_play: # エンコードされた盤の状態がboardsに追加される boards.append(encoder.encode(game)) # <6> move_one_hot = np.zeros(encoder.num_points()) move_one_hot[encoder.encode_point(move.point)] = 1 # one-hotエンコードされた次の着手がmovesに追加される moves.append(move_one_hot) # <7> print_move(game.next_player, move) # その後、ボットの着手が盤に適用される game = game.apply_move(move) # <8> num_moves += 1 # 最大手数に達していない限り、次の手番を続ける if num_moves > max_moves: # <9> break return np.array(boards), np.array(moves) # <10>
def generate_game(board_size, rounds, max_moves, temperature): # initialize encoded board state and encoded moves boards, moves = [], [] # initialize a OnePlaneEncoder by name with given board size encoder = get_encoder_by_name('oneplane', board_size) # Instantiate a new game with board_size game = goboard.GameState.new_game(board_size) # MCTS agent bot with specified rounds and temp bot = mcts.MCTSAgent(rounds, temperature) num_moves = 0 while not game.is_over(): print_board(game.board) # bot picks next move move = bot.select_move(game) if move.is_play: # append encoded board to board boards.append(encoder.encode(game)) # The one-hot-encoded next move is appended to moves move_one_hot = np.zeros(encoder.num_points()) move_one_hot[encoder.encode_point(move.point)] = 1 moves.append(move_one_hot) # apply bots move to the board print_move(game.next_player, move) game = game.apply_move(move) num_moves += 1 # keep going until max number of moves is reached. if num_moves > max_moves: break return np.array(boards), np.array(moves)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--network', default='large') parser.add_argument('output_file') args = parser.parse_args() encoder = encoders.get_encoder_by_name('simple', args.board_size) board_input = Input(shape=encoder.shape(), name='board_input') action_input = Input(shape=(encoder.num_points(), ), name='action_input') processed_board = board_input network = getattr(dlgo.networks, args.network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) value_output = Dense(1, activation='sigmoid')(processed_board) model = Model(inputs=board_input, outputs=value_output) new_agent = rl.ValueAgent(model, encoder) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)
def main(): pth = "//home//nail//Code_Go//checkpoints//" pth_experience = '//home//nail//Experience//' board_size = 19 network = 'small' hidden_size = 512 learning_agent = input('Агент для обучения "ценность действия": ') num_games = int(input('Количество игр для формирования учебных данных = ')) delta_games = int(input('Приращение количества игр = ')) learning_agent = pth + learning_agent + '.h5' # Это агент либо от политики градиентов(глава 10),либо из главы 7" output_file = pth + 'new_value_model.h5' # Это будет уже агент с двумя входами для ценности действия current_agent = pth + 'current_model.h5' # Текущий обучаемый агент lr = 0.0001 temp_decay = 0.98 min_temp = 0.00001 try: temperature = float(input('Temperature = ')) except: temperature = min_temp try: batch_size = int(input("batch_size = ")) except: batch_size = 512 try: epochs = int(input('Epochs = ')) except: epochs = 1 log_file = input('Журнал обработки: ') log_file = pth_experience + log_file + '.txt' logf = open(log_file, 'a') logf.write('----------------------\n') logf.write('Начало обучения агента %s в %s\n' % (learning_agent, datetime.datetime.now())) # Строится модель для обучения ценность действия # Два входа, один выход. # Компиляция модели если еще нет модели для ценности действия. # Иначе загружаем уже существующую модель. if 'value_model' in learning_agent and os.path.isfile(learning_agent): New_QAgent = False # Модель уже есть, надо продолжить обучение encoder = '' # Чтобы не было предупреждений о возможной ошибке в коде ниже. model = '' else: # Еще только надо создать модель для обучения # Нет модели с двумя входами. New_QAgent = True encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') action_input = Input(shape=(encoder.num_points(), ), name='action_input') processed_board = board_input network = getattr(dlgo.networks, network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) board_plus_action = concatenate([action_input, processed_board]) hidden_layer = Dense(hidden_size, activation='relu')(board_plus_action) value_output = Dense(1, activation='sigmoid')(hidden_layer) model = Model(inputs=[board_input, action_input], outputs=value_output) opt = SGD(lr=lr) model.compile(loss='mse', optimizer=opt) # "Заполнение" данными модели обучения из игр experience = [] os.chdir(pth_experience) lst_files = os.listdir(pth_experience) pattern = input('Паттерн для выборки файлов для обучения: ') if len(pattern) == 0: pattern = "exp*.h5" #============================================================== # Формируем список файлов с экспериментальными игровыми данными for entry in lst_files: if fnmatch.fnmatch(entry, pattern): experience.append(entry) # Получили список файлов игр для обучения exp_filename = '' if len(experience) > 0: experience.sort() exp_filename = experience[0] # Нужен только один файл else: print(' Нет файлов в папке для обучения!!!!') exit(2) #============================================================== # callback_list = [ModelCheckpoint(pth, monitor='val_accuracy', # save_best_only=True)] total_work = 0 # Счетчик "прогонов" обучения. exp_buffer = 'empty' # Буфер с игровыми данными while True: # Можно всегда прервать обучение и потом продолжть снова. if New_QAgent == False: q_agent = load_agent( learning_agent) # Текущая обучаемая модель cуществует model = q_agent.model # загружаем модель encoder = q_agent.encoder #temperature = q_agent.temperature logf.write('Прогон = %d\n' % total_work) print(50 * '=') print('Файл с играми для обучения: %s...' % exp_filename) print(50 * '=') if exp_buffer == 'empty': exp_buffer = rl.load_experience(h5py.File(exp_filename, "r")) # Заполняем данными для обучения из считанного буфера с играми скомпилированную модель. n = exp_buffer.states.shape[0] num_moves = encoder.num_points() y = np.zeros((n, )) actions = np.zeros((n, num_moves)) for i in range(n): action = exp_buffer.actions[i] reward = exp_buffer.rewards[i] actions[i][action] = 1 y[i] = 1 if reward > 0 else -1 # было 0 # Обучение модели model.fit([exp_buffer.states, actions], y, batch_size=batch_size, epochs=epochs) if total_work == 0: # Нового обученного агента для сравнения еще нет. print('Обновление агента!!!!! Это первый обновленный агент.') logf.write('Первое начальное обновление обученного агента\n') # Сохраняем обученного агента #output_file = output_file + '_' + str(total_work) + '.h5' new_agent = rl.QAgent(model, encoder) with h5py.File(current_agent, 'w') as outf: new_agent.serialize(outf) # os.chdir(pth_experience) # # lst_files = os.listdir(pth_experience) # next_filename = 'exp_q_' + str(total_work) + '.h5' # for entry in lst_files: # if fnmatch.fnmatch(entry, "exp*"): # shutil.move(exp_filename, pth_experience + 'Exp_Save//' + next_filename) # #os.remove('//home//nail//Experience//'+entry) # Очистка каталога с данными игр "старого" агента # # Формируем новые игровые данные с новым агентом. # exp_filename = pth_experience+next_filename # do_self_play(19, output_file, output_file, num_games=num_games, # temperature=temperature, experience_filename=exp_filename) # total_work += 1 if New_QAgent == True: # Не было еще агента с двумя входами. total_work += 1 New_QAgent = False # Теперь есть сохраненная модельс двумя входами. learning_agent = current_agent # Обучать будем нового созданного с двумя входами. new_agent = rl.QAgent(model, encoder) with h5py.File(current_agent, 'w') as outf: # Сохраняем агента как текущего new_agent.serialize(outf) continue # Сравнивать пока не с чем. Старые игровые данные оставляем new_agent = rl.QAgent(model, encoder) with h5py.File(current_agent, 'w') as outf: # Сохраняем агента как текущего new_agent.serialize(outf) # Сравниваем результат игры нового текущего агента с "старым" агентом. wins = eval(current_agent, learning_agent, num_games=num_games) print('Выиграно %d / %s игр (%.3f)' % (wins, str(num_games), float(wins) / float(num_games))) logf.write('Выиграно %d / %s игр (%.3f)\n' % (wins, str(num_games), float(wins) / float(num_games))) bt = binom_test(wins, num_games, 0.5) * 100 print('Бином тест = ', bt, '%') logf.write('Бином тест = %f\n' % bt) if bt <= 5 and wins > num_games / 2 + num_games / 10: # Означает не меньше чем 95% за то что новый бот играет лучше предыдущего print('Обновление агента!!!!!') # Сохраняем обученного агента new_agent = rl.QAgent(model, encoder) with h5py.File(output_file, 'w') as outf: new_agent.serialize(outf) logf.write( 'Выполнено обновление агента после успешного обучения %d время %s\n' % (total_work, datetime.datetime.now())) logf.write('Новый агент : %s\n' % output_file) #os.remove('//home//nail//Experience//*') # Очистка каталога с данными игр "старого" агента next_filename = 'exp_q_' + str(total_work) + '.h5' shutil.move(exp_filename, pth_experience + 'Exp_Save//' + next_filename) # Формируем новые игровые данные с новым агентом. exp_filename = pth_experience + next_filename temperature = max(min_temp, temp_decay * temperature) do_self_play(19, output_file, output_file, num_games=num_games, temperature=temperature, experience_filename=exp_filename) logf.write('Новая "температура" = %f\n' % temperature) else: print( 'Агента не меняем, Игровые данные тоже оставляем прежними \n') total_work += 1 print('Количество выполненных прогонов = ', total_work) logf.write('Выполнен прогон %d время at %s\n' % (total_work, datetime.datetime.now())) # Новая генерация учебных данных. # num_games += delta_games # Увеличиваем количество игр для обучения. # #temperature = max(min_temp, temp_decay * temperature) # next_filename = 'exp_q_' + str(total_work) + '.h5' # shutil.move(exp_filename, pth_experience + 'Exp_Save//' + next_filename) # exp_filename = pth_experience + next_filename # do_self_play(19, current_agent, current_agent, num_games=num_games, # temperature=0, experience_filename=exp_filename) # # # exp_buffer = rl.load_experience(h5py.File(exp_filename, "r")) # Загружаем в буфер новый файл с играми. learning_agent = current_agent # Обновляем "предыщуго обучаемого агента logf.flush()
def main(): board_size = 19 hidden_size = 1024 workdir = '//media//nail//SSD_Disk//Models//' output_file = workdir + 'q_agent.h5' lr = 0.01 batch_size = 512 pth = "//media//nail//SSD_Disk//Models//" pth_experience = '//media//nail//SSD_Disk//Experience//' experience_filename = pth_experience+'exp' only_form_games = input('Только сформировать новые игровые данные с существующей моделью?(Y/N)').lower() # "Заполнение" данными модели обучения из игр experience = [] os.chdir(pth_experience) lst_files = os.listdir(pth_experience) pattern = input('Паттерн для выборки файлов для обучения: ') if len(pattern) == 0: pattern = "exp*.h5" if only_form_games != 'y': # ============================================================== # Формируем список файлов с экспериментальными игровыми данными new_form_games = input('Формировать новые игровые данные с созаданной моделью?(Y/N) ').lower() count_exp = int(input('Сколько взять файлов для первичного обучения ? ')) len_lst_files = len(lst_files) if count_exp<= len_lst_files: for entry in lst_files[:count_exp]: if fnmatch.fnmatch(entry, pattern): experience.append(entry) else: for entry in lst_files: if fnmatch.fnmatch(entry, pattern): experience.append(entry) # Получили список файлов игр для обучения # Сортировка для удобства файлов. if len(experience) > 0: experience.sort() else: print(' Нет файлов в папке для обучения!!!!') exit(2) encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') action_input = Input(shape=(encoder.num_points(),), name='action_input') # ============================================================= # Сеть такая же частично как large оригинальня авторов книги # ============================================================= conv_0a = Conv2D(64, (7, 7), padding='same', activation='relu')(board_input) # BatchNormalization() conv_1a = Conv2D(64, (5, 5), padding='same', activation='relu')(conv_0a) #BatchNormalization() conv_2a = Conv2D(48, (5, 5), padding='same', activation='relu')(conv_1a) #BatchNormalization() conv_3a = Conv2D(48, (5, 5), padding='same', activation='relu')(conv_2a) #BatchNormalization() conv_4a = Conv2D(32, (5, 5), padding='same',activation='relu')(conv_3a) #BatchNormalization() flat = Flatten()(conv_4a) processed_board = Dense(1024)(flat) board_plus_action = concatenate([action_input, processed_board]) hidden_layer = Dense(hidden_size, activation='relu')(board_plus_action) value_output = Dense(1, activation='sigmoid')(hidden_layer) model = Model(inputs=[board_input, action_input], outputs=value_output) opt = SGD(lr=lr) model.compile(loss='mse', optimizer=opt) # Обучение модели fit_generator model.fit_generator( generator=generator_q(experience=experience, num_moves=361, batch_size=batch_size), steps_per_epoch=get_num_samples(experience=experience, num_moves=361, batch_size=batch_size) / batch_size, verbose=1, epochs=1, initial_epoch=0) # Прошлись по всем файлам new_agent = rl.QAgent(model, encoder) with h5py.File(output_file, 'w') as outf: new_agent.serialize(outf) if new_form_games == 'y': # Формируем список файлов с экспериментальными игровыми данными c новым впервые обученным агентом с двумя входами. num_games = int(input('Количество игр для генерации = ')) chunk = int(input('Количество игр в одном файле-порции = ')) experience = [] os.chdir(pth_experience) lst_files = os.listdir(pth_experience) for entry in lst_files: #if fnmatch.fnmatch(entry, 'exp*'): журналы тоже в папку сохранения, чистка всего. if os.path.isfile(entry) == True: experience.append(entry) for filename in experience: shutil.move(filename, pth_experience + 'Exp_Save//' + filename) do_self_play(19,output_file,output_file,num_games=num_games,temperature=0, experience_filename=experience_filename,chunk=chunk) else: model_file = input('Файл с существующей моделью = ') num_games = int(input('Количество игр для генерации = ')) chunk = int(input('Количество игр в одном файле-порции = ')) model_file = workdir + model_file + '.h5' do_self_play(19, model_file, model_file, num_games=num_games, temperature=0, experience_filename=experience_filename, chunk=chunk)
from keras.models import Model from keras.layers import Conv2D, Dense, Flatten, Input from dlgo import encoders board_size = 19 encoder = encoders.get_encoder_by_name('simple', board_size) board_input = Input(shape=encoder.shape(), name='board_input') conv1 = Conv2D(64, (3, 3), padding='same', activation='relu')(board_input) conv2 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv1) conv3 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv2) flat = Flatten()(conv3) processed_board = Dense(512)(flat) policy_hidden_layer = Dense(512, activation='relu')(processed_board) policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer) value_hidden_layer = Dense(512, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=board_input, outputs=[policy_output, value_output])
def main(): """ board_input = Input(shape=encoder.shape(), name='board_input') # Add as many convolutional layers as you like conv1 = Conv2D(64, (3, 3), padding='same', activation='relu')(board_input) conv2 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv1) conv3 = Conv2D(64, (3, 3), padding='same', activation='relu')(conv2) flat = Flatten()(conv3) # This example uses hidden layers of size 512. # Experiment to find the best size. # The three hidden layers don't need to be the same size processed_board = Dense(512)(flat) # This output yields the policy function policy_hidden_layer = Dense(512, activation='relu')(processed_board) policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer) # This output yields the value function value_hidden_layer = Dense(512, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=board_input, outputs=[policy_output, value_output]) """ # added from gh repo parser = argparse.ArgumentParser() parser.add_argument('--board-size', type=int, default=19) parser.add_argument('--network', default='large') parser.add_argument('--hidden-size', type=int, default=512) parser.add_argument('output_file') args = parser.parse_args() encoder = encoders.get_encoder_by_name('sevenplane', args.board_size) board_input = Input(shape=encoder.shape(), name='board_input') processed_board = board_input network = getattr(dlgo.networks, args.network) for layer in network.layers(encoder.shape()): processed_board = layer(processed_board) policy_hidden_layer = Dense(args.hidden_size, activation='relu')(processed_board) policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer) value_hidden_layer = Dense(args.hidden_size, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) new_agent = rl.ACAgent(model, encoder) with h5py.File(args.output_file, 'w') as outf: new_agent.serialize(outf)