示例#1
0
def test_model_on_static_examples(model_fname, training_examples_fname, m=0):
  modelInstance = model.load(model_fname)

  # n = model9x9['n']
  W = modelInstance['W']
  b = modelInstance['b']

  trainingExamples = training.read_training_examples(m, fname=training_examples_fname)

  if m == 0:
    m = trainingExamples['X'].shape[1]

  for i in range(1000):
    # i = round(np.random.rand() * m)
    x = trainingExamples['X'].T[i]
    nextPosition = position.transform_vector_into_position(x)

    position.print_position(nextPosition)

    print(' predicted ')

    x = position.transform_position_into_vector(nextPosition)
    movement = model.predict(W, b, x)
    position.print_movement(movement)

    print(' expected ')

    y = trainingExamples['Y'].T[i]
    position.print_movement(y.reshape(9, 1))

    raw_input("Press Enter to continue...")
示例#2
0
def predict3(W, b, x):
  assert isinstance(W, np.ndarray)
  assert isinstance(b, np.ndarray)
  assert isinstance(x, np.ndarray)

  assert training.is_proper_training_X_data(x)

  debug = False

  if debug:
    received_position = position.transform_vector_into_position(x)
    print("received position:")
    position.print_position(received_position)

  (aL, _) = forward_propagation(W, b, x)

  if debug:
    print("received FP results:")
    print(aL)

  y = np.zeros((9, 1))

  maxIndex = aL.argmax()

  # if aL[maxIndex] > 0.5:
  y[maxIndex] = 1

  if debug:
    y_position = position.transform_vector_into_position(y)
    print("prediction:")
    position.print_position(y_position)

    raw_input("...")

  return (y, aL[maxIndex], aL)
示例#3
0
def spy_on_training_process(model_fname):
  model_instance = model.load(model_fname)

  n = model_instance['n']
  W = model_instance['W']
  b = model_instance['b']
  
  vdW = np.zeros(W.shape)
  vdb = np.zeros(b.shape)

  alpha0 = 0.3
  beta=0.9
  iterations = 100000
  decay_rate = 4.0 / iterations

  pos_trains = 0
  x_to_investigate = None

  for i in range(0, iterations):
    make_movement_fn = lambda x: model.predict2(W, b, x)

    ex = make_training_examples(make_movement_fn)

    X = ex['X']
    Y = ex['Y']

    x = X[:, 0].reshape(9, 1)

    if x_to_investigate == None:
      x_to_investigate = x
      position_to_investigate = x_to_investigate.reshape(3, 3)

    if (x == x_to_investigate).all():
      position.print_position(position_to_investigate)
      (_, _, aLbefore) = model.predict3(W, b, x_to_investigate)

    (dW, db, _) = model.back_propagation(n, W, b, X, Y)

    alpha = alpha0 / (1.0 + decay_rate * i)

    vdW = beta * vdW + (1 - beta) * dW
    vdb = beta * vdb + (1 - beta) * db

    W = W - alpha * dW
    b = b - alpha * db

    if i > 0 and i % 1000 == 0:
      model.save(n, W, b, model_fname)
      print('========saved=======')

    if (x == x_to_investigate).all():
      pos_trains += 1
      position.print_position(position_to_investigate)
      (_, _, aLafter) = model.predict3(W, b, x_to_investigate)
      print('\niteration: %d, position trained times: %d' % (i, pos_trains))
      y = Y[:, 0].reshape(9, 1)
      table = np.concatenate((aLbefore, aLafter, y), axis = 1)
      print(table)
示例#4
0
def display_signle_training_example(x, y):
  nextPosition = position.transform_vector_into_position(x)

  position.print_position(nextPosition)

  print(' expected ')

  position.print_movement(y.reshape(9, 1))

  raw_input("Press Enter to continue...")
示例#5
0
def test_position(model_fname):
  print('\ntest model: %s' % (model_fname))
  model_instance = model.load(model_fname)
  W = model_instance['W']
  b = model_instance['b']


  x = np.array([
    -1,-1, 0,
     0, 1, 0,
     0, 0, 0,
  ]).reshape(9, 1)

  print('\n')
  position.print_position(position.transform_vector_into_position(x))

  (aL, _) = model.forward_propagation(W, b, x)
  print("aL")
  print(aL)
  movement = model.predict(W, b, x)
  position.print_movement(movement)
示例#6
0
def train_model_scenario_4(model_fname, alpha0=1, iterations=500000, beta=0.9):
  debug = False

  model_instance = model.load(model_fname)

  n = model_instance['n']
  W = model_instance['W']
  b = model_instance['b']
  
  vdW = np.zeros(W.shape)
  vdb = np.zeros(b.shape)

  decay_rate = 9.0 / iterations # it will reduce final alpha 10 times

  for i in range(0, iterations):
    if i % 1000 == 0:
      print('i: %d' % (i))

    # debug = True if i % 500 == 0 else False

    make_movement_fn = lambda x: model.predict2(W, b, x)

    ex = training.make_training_examples(make_movement_fn)

    X = ex['X']
    Y = ex['Y']

    if debug:
      print(X)
      print(Y)
      for j in range(len(X.T) - 1, -1, -1):
        x = X.T[j]
        nextPosition = position.transform_vector_into_position(x)
        x = position.transform_position_into_vector(nextPosition)

        position.print_position(nextPosition)

        (aL, _) = model.forward_propagation(W, b, x)

        print("\naL")
        print(aL)

        print('\n predicted ')

        movement = model.predict(W, b, x)
        position.print_movement(movement)

        print(' expected ')

        y = Y.T[j]
        position.print_movement(y.reshape(9, 1))

        raw_input("Press Enter to continue...")

    # displayTrainingExamples(X, Y)

    # (dW, db) = model.calcGradients(W, b, X, Y)
    (dW, db, _) = model.back_propagation(n, W, b, X, Y)

    if debug:
      if i > 0 and i % 3000 == 0:
        is_back_prop_correct = model.check_back_propagation(n, W, b, X, Y)

        if is_back_prop_correct:
          print("BP is OK")
        else:
          print("BP is not correct")
          exit()

    alpha = alpha0 / (1.0 + decay_rate * i)

    # if debug:
    if i % 1000 == 0:
      print("alpha:")
      print(alpha)

    vdW = beta * vdW + (1 - beta) * dW
    vdb = beta * vdb + (1 - beta) * db

    # model.updateWeights(W, dW, b, db, alpha)

    # W = W - alpha * vdW
    # b = b - alpha * vdb
    W = W - alpha * dW
    b = b - alpha * db
    
    if i > 0 and i % 50 == 0:
      model.save(n, W, b, model_fname)
      if debug:
        print("model saved")

  print('------ end -------')

  model.save(n, W, b, model_fname)
示例#7
0
def movement_matrix_in_vector(initial_position, movement_coords, result,
                              final_position):
    assert isinstance(initial_position, np.ndarray)
    assert initial_position.shape == (3, 3)

    assert isinstance(movement_coords, tuple)
    assert len(movement_coords) == 2

    assert isinstance(result, str)

    assert isinstance(final_position, np.ndarray)
    assert final_position.shape == (3, 3)

    debug = False

    movementMatrix = np.zeros((3, 3))
    [mi, mj] = movement_coords

    zeros_in_final_position = (final_position == 0).astype(np.int8).sum()
    zeros_in_initial_position = (initial_position == 0).astype(np.int8).sum()
    power = zeros_in_initial_position - zeros_in_final_position - 1
    reward = 0.5 / (2**power)

    for i in range(3):
        for j in range(3):
            if i == mi and j == mj:
                if result == 'win':
                    movementMatrix[i][j] = 0.75 + 0.5 * reward
                elif result == 'loss':
                    movementMatrix[i][j] = 0.5 - reward + 0.001
                else:
                    movementMatrix[i][j] = 0.5 + 0.5 * reward
            elif initial_position[i][j] == 0:
                movementMatrix[i][j] = 0
            else:
                movementMatrix[i][j] = 0.001

    if debug:
        print("\nresult:")
        print(result)
        print("initial position:")
        position.print_position(initial_position)
        print("final position:")
        position.print_position(final_position)
        print("power:")
        print(power)
        print("reward:")
        print(reward)
        print("movementMatrix:")
        print(movementMatrix)
        raw_input("Enter")

    y = position.transform_position_into_vector(movementMatrix)

    # print('initial position')
    # position.printPosition(initialPosition)

    # print('y')
    # position.printMovement(y.reshape(9, 1))

    # print('final position')
    # position.printPosition(finalPosition)

    # raw_input("Press Enter to continue...")

    return y
示例#8
0
def make_single_training_example_for_main_player(position_before,
                                                 movement,
                                                 final_position,
                                                 highest_al=0):
    assert position.is_real_position(position_before)
    assert isinstance(movement['coords'], tuple)
    assert len(movement['coords']) == 2
    assert position.is_final_position(final_position)

    debug = False

    (i, j) = movement['coords']
    # Main player is not necessarily the one who starts the game!
    main_player_started_the_game = final_position[i][j] == 1

    if main_player_started_the_game:
        x = position.transform_position_into_vector(position_before)
    else:
        position_before_inverted = position.invert_position(position_before)
        x = position.transform_position_into_vector(position_before_inverted)

    if debug:
        print('position_before')
        position.print_position(position_before)
        print('x')
        print(x)

    result_position = movement['result_position']
    zeros_in_result_position = (result_position == 0).astype(np.int8).sum()
    zeros_in_final_position = (final_position == 0).astype(np.int8).sum()
    is_last_game_movement = zeros_in_result_position == zeros_in_final_position
    is_prelast_game_movement = zeros_in_result_position - zeros_in_final_position == 1

    if is_last_game_movement:
        if position.is_win_position(final_position):
            # player X plays and wins
            value = 1
        elif position.is_loss_position(final_position):
            # player O plays and wins
            value = 1
        elif position.is_draw_position(final_position):
            # player X plays and draws
            value = 0.5
        else:
            # must never happen
            assert False
    elif is_prelast_game_movement:
        if position.is_win_position(final_position):
            # player X plays then O plays and wins
            value = 0.1
        elif position.is_loss_position(final_position):
            # player O plays then X plays and wins
            value = 0.1
        elif position.is_draw_position(final_position):
            # player O plays then X plays and draws
            value = 0.5
        else:
            # must never happen
            assert False
    else:
        value = highest_al

    y_as_position = (position_before != 0).astype(np.int8) * 0.001
    y_as_position[i][j] = value
    y = position.transform_position_into_vector(y_as_position)

    if debug:
        print('y')
        print(y)
        # raw_input("...")

    assert is_proper_training_data(x, y)

    al = movement['highest_al']

    return (x, y, al)
示例#9
0
def make_training_examples_rec(initial_position,
                               make_movement_fn,
                               previous_movement_was_random=False):
    assert isinstance(initial_position, np.ndarray)
    assert initial_position.shape == (3, 3)
    assert position.is_real_position(initial_position)

    debug = False

    chance_of_random_main_player_movement = np.random.rand()
    do_random_movement = previous_movement_was_random or chance_of_random_main_player_movement < 0.05

    # main player movement could be random
    main_player_movement = make_movement(initial_position, make_movement_fn,
                                         do_random_movement)

    position_after = main_player_movement['result_position']

    if position.is_final_position(position_after):
        (x, y, al) = make_single_training_example_for_main_player(
            initial_position, main_player_movement, position_after)

        # if debug:
        #   print(x)
        #   print(y)
        #   raw_input('...')

        return {
            'X': x,
            'Y': y,
            'AL': al,
            'final_position': position_after,
        }

    # opponent movement could be random
    chance_of_random_opponent_movement = np.random.rand()
    do_random_opponent_movement = chance_of_random_opponent_movement < 0.01

    opponent_movement = make_movement(
        position_after,
        make_movement_fn,
        do_random_movement=do_random_opponent_movement)

    position_after = opponent_movement['result_position']

    if position.is_final_position(position_after):
        (x, y, al) = make_single_training_example_for_main_player(
            initial_position, main_player_movement, position_after)

        # if debug:
        #   print(x)
        #   print(y)
        #   raw_input('...')

        return {
            'X': x,
            'Y': y,
            'AL': al,
            'final_position': position_after,
        }

    if debug:
        print("initial_position")
        position.print_position(initial_position)
        print("main_player_movement")
        print(main_player_movement)
        print("opponent_movement")
        print(opponent_movement)
        print("opponent_movement_result_position")
        position.print_position(position_after)

    the_dict = make_training_examples_rec(position_after, make_movement_fn,
                                          do_random_movement)
    X = the_dict['X']
    Y = the_dict['Y']
    AL = the_dict['AL']
    final_position = the_dict['final_position']

    assert position.is_final_position(final_position)

    highest_al = AL[len(AL) - 1]

    (x, y, al) = make_single_training_example_for_main_player(
        initial_position, main_player_movement, final_position, highest_al)

    X = np.append(X, x, axis=1)
    Y = np.append(Y, y, axis=1)
    AL = np.append(AL, al)

    if debug:
        print('\nX')
        print(X)
        print('Y')
        print(Y)
        print('AL')
        print(AL)
        raw_input('...')

    return {
        'X': X,
        'Y': Y,
        'AL': AL,
        'final_position': final_position,
    }