示例#1
0
              steps_per_theta_update=None,
              verbose=1,
              norm_value=NORM_VALUE,
              independent=INDEPENDENT)
              # term_condition=lambda v1, v2:
              # increment_base_termination(v1,v2,2,tol=1e-2))
              #term_condition=lambda v1, v2: terminal_evaluation(v1,v2,1e-1))


def tmetric(theta):
    t = pbo.apply_bo(theta[0], n_times=STEPS_AHEAD)
    return q_regressor.get_k(t)


state, actions, reward, next_states, absorbing = split_dataset(dataset,
                                                    state_dim=state_dim,
                                                    action_dim=action_dim,
                                                    reward_dim=reward_dim)

theta0 = np.array([6., 10.001], dtype='float32').reshape(1, -1)
# theta0 = np.array([16., 10.001], dtype='float32').reshape(1, -1)
history = pbo.fit(state, actions, next_states, reward, absorbing, theta0,
                  batch_size=10, nb_epoch=EPOCH,
                  theta_metrics={'k': tmetric})

##########################################
# Evaluate the final solution
initial_states = np.array([[1, 2, 5, 7, 10]]).T
values = evaluation.evaluate_policy(mdp, pbo, initial_states=initial_states)
print('Learned theta: {}'.format(pbo.learned_theta_value))
print('Final performance of PBO: {}'.format(values))
示例#2
0
        'scale': True,
        'verbose': 1,
        'significance': args.significance
    }
    selector = IFS(**ifs_params)
    features_names = np.array(['S%s' % i for i in xrange(state_dim)] +
                              ['A%s' % i for i in xrange(action_dim)])
    rfs_params = {
        'feature_selector': selector,
        'features_names': features_names,
        'verbose': 1
    }
    fs = RFS(**rfs_params)

    # Split dataset for RFS
    state, actions, reward, next_states = split_dataset(
        dataset, state_dim, action_dim, reward_dim)

    # Run RFS
    fs.fit(state, actions, next_states, reward)

    # Reduce the dataset for FQI
    selected_states = []
    selected_actions = []
    for f in features_names[np.where(fs.get_support())]:
        if f.startswith('S'):
            selected_states.append(f)
        if f.startswith('A'):
            selected_actions.append(f)

    # TODO remove this once everything works
    assert len(selected_states) > 0, '### RFS fail ###'
示例#3
0
文件: fs_test.py 项目: teopir/ifqi
# dataset: s, a, r, s'
# dataset = evaluation.collect_episodes(mdp, n_episodes=50)
dataset = np.loadtxt('encoded_dataset.csv', skiprows=1, delimiter=',')
# check_dataset(dataset, state_dim, action_dim, reward_dim)

estimator = ExtraTreesRegressor(n_estimators=50, n_jobs=-1,
                                             importance_criterion="gini")
# estimator = DecisionTreeRegressor(importance_criterion="gini")

selector = IFS(estimator=estimator,
               scale=True, verbose=1)
features_names = ['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in
                                                           xrange(action_dim)]
fs = RFS(feature_selector=selector,
         # features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']),
         features_names=np.array(features_names),
         verbose=1)

state, actions, reward, next_states = \
    split_dataset(dataset, state_dim, action_dim, reward_dim)

state = dataset[:,0:state_dim]
actions = dataset[:,state_dim:state_dim+action_dim]
reward = dataset[:,state_dim+action_dim]

# print(dataset[:10, :])

fs.fit(state, actions, next_states, reward)
print(
    fs.get_support())  # this are the selected features, it should be [s0, s2, a0]
示例#4
0
              update_theta_every=UPDATE_EVERY,
              steps_per_theta_update=None,
              verbose=1,
              norm_value=NORM_VALUE,
              independent=INDEPENDENT)
# term_condition=lambda v1, v2:
# increment_base_termination(v1,v2,2,tol=1e-2))
#term_condition=lambda v1, v2: terminal_evaluation(v1,v2,1e-1))


def tmetric(theta):
    t = pbo.apply_bo(theta[0], n_times=STEPS_AHEAD)
    return q_regressor.get_k(t)


state, actions, reward, next_states, absorbing = split_dataset(
    dataset, state_dim=state_dim, action_dim=action_dim, reward_dim=reward_dim)

theta0 = np.array([6., 10.001], dtype='float32').reshape(1, -1)
# theta0 = np.array([16., 10.001], dtype='float32').reshape(1, -1)
history = pbo.fit(state,
                  actions,
                  next_states,
                  reward,
                  absorbing,
                  theta0,
                  batch_size=10,
                  nb_epoch=EPOCH,
                  theta_metrics={'k': tmetric})

##########################################
# Evaluate the final solution
示例#5
0
# np.random.seed(3452)

mdp = env.SyntheticToyFS()
state_dim, action_dim, reward_dim = get_space_info(mdp)
nextstate_idx = state_dim + action_dim + reward_dim
reward_idx = action_dim + state_dim

# dataset: s, a, r, s'
dataset = evaluation.collect_episodes(mdp, n_episodes=50)
check_dataset(dataset, state_dim, action_dim, reward_dim)

selector = IFS(estimator=ExtraTreesRegressor(n_estimators=50),
               scale=True, verbose=1)
fs = RFS(feature_selector=selector,
         features_names=np.array(['S0', 'S1', 'S2', 'S3', 'A0', 'A1']),
         verbose=1)

state, actions, reward, next_states, absorbing = \
    split_dataset(dataset, state_dim, action_dim, reward_dim)

# print(dataset[:10, :])

fs.fit(state, actions, next_states, reward)
selected_features = fs.features_names[fs.get_support()]
print('selected features: {}'.format(selected_features))  # this are the selected features, it should be [s0, s2, a0]
assert np.all(selected_features == ['S0', 'S2', 'A0'])

print(fs.nodes)
g = fs.export_graphviz()
g.view()
示例#6
0
                            'n_jobs': args.njobs}
    ifs_params = {'estimator': ExtraTreesRegressor(**ifs_regressor_params),
                  'n_features_step': 1,
                  'cv': None,
                  'scale': True,
                  'verbose': 1,
                  'significance': args.significance}
    selector = IFS(**ifs_params)
    features_names = np.array(['S%s' % i for i in xrange(state_dim)] + ['A%s' % i for i in xrange(action_dim)])
    rfs_params = {'feature_selector': selector,
                  'features_names': features_names,
                  'verbose': 1}
    fs = RFS(**rfs_params)

    # Split dataset for RFS
    state, actions, reward, next_states = split_dataset(dataset, state_dim, action_dim, reward_dim)

    # Run RFS
    fs.fit(state, actions, next_states, reward)

    # Reduce the dataset for FQI
    selected_states = []
    selected_actions = []
    for f in features_names[np.where(fs.get_support())]:
        if f.startswith('S'):
            selected_states.append(f)
        if f.startswith('A'):
            selected_actions.append(f)

    # TODO remove this once everything works
    assert len(selected_states) > 0, '### RFS fail ###'