def test_model_on_static_examples(model_fname, training_examples_fname, m=0): modelInstance = model.load(model_fname) # n = model9x9['n'] W = modelInstance['W'] b = modelInstance['b'] trainingExamples = training.read_training_examples(m, fname=training_examples_fname) if m == 0: m = trainingExamples['X'].shape[1] for i in range(1000): # i = round(np.random.rand() * m) x = trainingExamples['X'].T[i] nextPosition = position.transform_vector_into_position(x) position.print_position(nextPosition) print(' predicted ') x = position.transform_position_into_vector(nextPosition) movement = model.predict(W, b, x) position.print_movement(movement) print(' expected ') y = trainingExamples['Y'].T[i] position.print_movement(y.reshape(9, 1)) raw_input("Press Enter to continue...")
def predict3(W, b, x): assert isinstance(W, np.ndarray) assert isinstance(b, np.ndarray) assert isinstance(x, np.ndarray) assert training.is_proper_training_X_data(x) debug = False if debug: received_position = position.transform_vector_into_position(x) print("received position:") position.print_position(received_position) (aL, _) = forward_propagation(W, b, x) if debug: print("received FP results:") print(aL) y = np.zeros((9, 1)) maxIndex = aL.argmax() # if aL[maxIndex] > 0.5: y[maxIndex] = 1 if debug: y_position = position.transform_vector_into_position(y) print("prediction:") position.print_position(y_position) raw_input("...") return (y, aL[maxIndex], aL)
def spy_on_training_process(model_fname): model_instance = model.load(model_fname) n = model_instance['n'] W = model_instance['W'] b = model_instance['b'] vdW = np.zeros(W.shape) vdb = np.zeros(b.shape) alpha0 = 0.3 beta=0.9 iterations = 100000 decay_rate = 4.0 / iterations pos_trains = 0 x_to_investigate = None for i in range(0, iterations): make_movement_fn = lambda x: model.predict2(W, b, x) ex = make_training_examples(make_movement_fn) X = ex['X'] Y = ex['Y'] x = X[:, 0].reshape(9, 1) if x_to_investigate == None: x_to_investigate = x position_to_investigate = x_to_investigate.reshape(3, 3) if (x == x_to_investigate).all(): position.print_position(position_to_investigate) (_, _, aLbefore) = model.predict3(W, b, x_to_investigate) (dW, db, _) = model.back_propagation(n, W, b, X, Y) alpha = alpha0 / (1.0 + decay_rate * i) vdW = beta * vdW + (1 - beta) * dW vdb = beta * vdb + (1 - beta) * db W = W - alpha * dW b = b - alpha * db if i > 0 and i % 1000 == 0: model.save(n, W, b, model_fname) print('========saved=======') if (x == x_to_investigate).all(): pos_trains += 1 position.print_position(position_to_investigate) (_, _, aLafter) = model.predict3(W, b, x_to_investigate) print('\niteration: %d, position trained times: %d' % (i, pos_trains)) y = Y[:, 0].reshape(9, 1) table = np.concatenate((aLbefore, aLafter, y), axis = 1) print(table)
def display_signle_training_example(x, y): nextPosition = position.transform_vector_into_position(x) position.print_position(nextPosition) print(' expected ') position.print_movement(y.reshape(9, 1)) raw_input("Press Enter to continue...")
def test_position(model_fname): print('\ntest model: %s' % (model_fname)) model_instance = model.load(model_fname) W = model_instance['W'] b = model_instance['b'] x = np.array([ -1,-1, 0, 0, 1, 0, 0, 0, 0, ]).reshape(9, 1) print('\n') position.print_position(position.transform_vector_into_position(x)) (aL, _) = model.forward_propagation(W, b, x) print("aL") print(aL) movement = model.predict(W, b, x) position.print_movement(movement)
def train_model_scenario_4(model_fname, alpha0=1, iterations=500000, beta=0.9): debug = False model_instance = model.load(model_fname) n = model_instance['n'] W = model_instance['W'] b = model_instance['b'] vdW = np.zeros(W.shape) vdb = np.zeros(b.shape) decay_rate = 9.0 / iterations # it will reduce final alpha 10 times for i in range(0, iterations): if i % 1000 == 0: print('i: %d' % (i)) # debug = True if i % 500 == 0 else False make_movement_fn = lambda x: model.predict2(W, b, x) ex = training.make_training_examples(make_movement_fn) X = ex['X'] Y = ex['Y'] if debug: print(X) print(Y) for j in range(len(X.T) - 1, -1, -1): x = X.T[j] nextPosition = position.transform_vector_into_position(x) x = position.transform_position_into_vector(nextPosition) position.print_position(nextPosition) (aL, _) = model.forward_propagation(W, b, x) print("\naL") print(aL) print('\n predicted ') movement = model.predict(W, b, x) position.print_movement(movement) print(' expected ') y = Y.T[j] position.print_movement(y.reshape(9, 1)) raw_input("Press Enter to continue...") # displayTrainingExamples(X, Y) # (dW, db) = model.calcGradients(W, b, X, Y) (dW, db, _) = model.back_propagation(n, W, b, X, Y) if debug: if i > 0 and i % 3000 == 0: is_back_prop_correct = model.check_back_propagation(n, W, b, X, Y) if is_back_prop_correct: print("BP is OK") else: print("BP is not correct") exit() alpha = alpha0 / (1.0 + decay_rate * i) # if debug: if i % 1000 == 0: print("alpha:") print(alpha) vdW = beta * vdW + (1 - beta) * dW vdb = beta * vdb + (1 - beta) * db # model.updateWeights(W, dW, b, db, alpha) # W = W - alpha * vdW # b = b - alpha * vdb W = W - alpha * dW b = b - alpha * db if i > 0 and i % 50 == 0: model.save(n, W, b, model_fname) if debug: print("model saved") print('------ end -------') model.save(n, W, b, model_fname)
def movement_matrix_in_vector(initial_position, movement_coords, result, final_position): assert isinstance(initial_position, np.ndarray) assert initial_position.shape == (3, 3) assert isinstance(movement_coords, tuple) assert len(movement_coords) == 2 assert isinstance(result, str) assert isinstance(final_position, np.ndarray) assert final_position.shape == (3, 3) debug = False movementMatrix = np.zeros((3, 3)) [mi, mj] = movement_coords zeros_in_final_position = (final_position == 0).astype(np.int8).sum() zeros_in_initial_position = (initial_position == 0).astype(np.int8).sum() power = zeros_in_initial_position - zeros_in_final_position - 1 reward = 0.5 / (2**power) for i in range(3): for j in range(3): if i == mi and j == mj: if result == 'win': movementMatrix[i][j] = 0.75 + 0.5 * reward elif result == 'loss': movementMatrix[i][j] = 0.5 - reward + 0.001 else: movementMatrix[i][j] = 0.5 + 0.5 * reward elif initial_position[i][j] == 0: movementMatrix[i][j] = 0 else: movementMatrix[i][j] = 0.001 if debug: print("\nresult:") print(result) print("initial position:") position.print_position(initial_position) print("final position:") position.print_position(final_position) print("power:") print(power) print("reward:") print(reward) print("movementMatrix:") print(movementMatrix) raw_input("Enter") y = position.transform_position_into_vector(movementMatrix) # print('initial position') # position.printPosition(initialPosition) # print('y') # position.printMovement(y.reshape(9, 1)) # print('final position') # position.printPosition(finalPosition) # raw_input("Press Enter to continue...") return y
def make_single_training_example_for_main_player(position_before, movement, final_position, highest_al=0): assert position.is_real_position(position_before) assert isinstance(movement['coords'], tuple) assert len(movement['coords']) == 2 assert position.is_final_position(final_position) debug = False (i, j) = movement['coords'] # Main player is not necessarily the one who starts the game! main_player_started_the_game = final_position[i][j] == 1 if main_player_started_the_game: x = position.transform_position_into_vector(position_before) else: position_before_inverted = position.invert_position(position_before) x = position.transform_position_into_vector(position_before_inverted) if debug: print('position_before') position.print_position(position_before) print('x') print(x) result_position = movement['result_position'] zeros_in_result_position = (result_position == 0).astype(np.int8).sum() zeros_in_final_position = (final_position == 0).astype(np.int8).sum() is_last_game_movement = zeros_in_result_position == zeros_in_final_position is_prelast_game_movement = zeros_in_result_position - zeros_in_final_position == 1 if is_last_game_movement: if position.is_win_position(final_position): # player X plays and wins value = 1 elif position.is_loss_position(final_position): # player O plays and wins value = 1 elif position.is_draw_position(final_position): # player X plays and draws value = 0.5 else: # must never happen assert False elif is_prelast_game_movement: if position.is_win_position(final_position): # player X plays then O plays and wins value = 0.1 elif position.is_loss_position(final_position): # player O plays then X plays and wins value = 0.1 elif position.is_draw_position(final_position): # player O plays then X plays and draws value = 0.5 else: # must never happen assert False else: value = highest_al y_as_position = (position_before != 0).astype(np.int8) * 0.001 y_as_position[i][j] = value y = position.transform_position_into_vector(y_as_position) if debug: print('y') print(y) # raw_input("...") assert is_proper_training_data(x, y) al = movement['highest_al'] return (x, y, al)
def make_training_examples_rec(initial_position, make_movement_fn, previous_movement_was_random=False): assert isinstance(initial_position, np.ndarray) assert initial_position.shape == (3, 3) assert position.is_real_position(initial_position) debug = False chance_of_random_main_player_movement = np.random.rand() do_random_movement = previous_movement_was_random or chance_of_random_main_player_movement < 0.05 # main player movement could be random main_player_movement = make_movement(initial_position, make_movement_fn, do_random_movement) position_after = main_player_movement['result_position'] if position.is_final_position(position_after): (x, y, al) = make_single_training_example_for_main_player( initial_position, main_player_movement, position_after) # if debug: # print(x) # print(y) # raw_input('...') return { 'X': x, 'Y': y, 'AL': al, 'final_position': position_after, } # opponent movement could be random chance_of_random_opponent_movement = np.random.rand() do_random_opponent_movement = chance_of_random_opponent_movement < 0.01 opponent_movement = make_movement( position_after, make_movement_fn, do_random_movement=do_random_opponent_movement) position_after = opponent_movement['result_position'] if position.is_final_position(position_after): (x, y, al) = make_single_training_example_for_main_player( initial_position, main_player_movement, position_after) # if debug: # print(x) # print(y) # raw_input('...') return { 'X': x, 'Y': y, 'AL': al, 'final_position': position_after, } if debug: print("initial_position") position.print_position(initial_position) print("main_player_movement") print(main_player_movement) print("opponent_movement") print(opponent_movement) print("opponent_movement_result_position") position.print_position(position_after) the_dict = make_training_examples_rec(position_after, make_movement_fn, do_random_movement) X = the_dict['X'] Y = the_dict['Y'] AL = the_dict['AL'] final_position = the_dict['final_position'] assert position.is_final_position(final_position) highest_al = AL[len(AL) - 1] (x, y, al) = make_single_training_example_for_main_player( initial_position, main_player_movement, final_position, highest_al) X = np.append(X, x, axis=1) Y = np.append(Y, y, axis=1) AL = np.append(AL, al) if debug: print('\nX') print(X) print('Y') print(Y) print('AL') print(AL) raw_input('...') return { 'X': X, 'Y': Y, 'AL': AL, 'final_position': final_position, }