示例#1
0
with policy_sess.as_default(
):  # create the Controller and build the internal policy network
    controller = Controller(policy_sess,
                            NUM_LAYERS,
                            state_space,
                            reg_param=REGULARIZATION,
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# get an initial random state space if controller needs to predict an
# action from the initial state
#随机初始化
# state = state_space.get_random_state_space(NUM_LAYERS)
state = state_space.get_state([3, 'relu', 3, 'linear'])
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()
isFirst = True
# train for number of trails
for trial in range(MAX_TRIALS):
    if isFirst:
        actions = state
        isFirst = False
    else:
        with policy_sess.as_default():
            K.set_session(policy_sess)
            actions = controller.get_action(
示例#2
0
with policy_sess.as_default():
    # create the Controller and build the internal policy network
    controller = Controller(policy_sess,
                            NUM_LAYERS,
                            state_space,
                            reg_param=REGULARIZATION,
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# get an initial random state space if controller needs to predict an
# action from the initial state
#随机初始化
# state = state_space.get_random_state_space(NUM_LAYERS)
state = state_space.get_state([1, 'relu', 1, 'linear'])
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()
isFirst = True
# train for number of trails
for trial in range(MAX_TRIALS):
    if isFirst:
        actions = state
        isFirst = False
    else:
        np.random.seed()
        with policy_sess.as_default():
            K.set_session(policy_sess)