示例#1
0
文件: dnn.py 项目: shirisht/MDP-DP-RL
            decay_param=gamma_lambda,
            hidden_activation_deriv=self.hidden_activation_deriv,
            output_activation_deriv=self.output_activation_deriv
        )


if __name__ == '__main__':
    this_dnn_obj = DNNSpec(
        neurons=[2],
        hidden_activation=DNNSpec.relu,
        hidden_activation_deriv=DNNSpec.relu_deriv,
        output_activation=DNNSpec.identity,
        output_activation_deriv=DNNSpec.identity_deriv
    )
    nn = DNN(
        feature_funcs=FuncApproxBase.get_identity_feature_funcs(3),
        dnn_obj=this_dnn_obj,
        reglr_coeff=0.,
        learning_rate=1.,
        adam=True,
        adam_decay1=0.9,
        adam_decay2=0.999
    )
    init_eval = nn.get_func_eval((2.0, 3.0, -4.0))
    print(init_eval)

    x_pts = np.arange(-10.0, 10.0, 0.5)
    y_pts = np.arange(-10.0, 10.0, 0.5)
    z_pts = np.arange(-10.0, 10.0, 0.5)
    pts = [(x, y, z) for x in x_pts for y in y_pts for z in z_pts]
示例#2
0
    valid = wg.validate_spec()
    mdp_ref_obj = wg.get_mdp_refined()
    this_tolerance = 1e-3
    this_first_visit_mc = True
    this_num_samples = 30
    this_softmax = False
    this_epsilon = 0.05
    this_epsilon_half_life = 100
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 1000
    this_max_steps = 1000
    this_td_offline = True
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=FuncApproxBase.get_indicator_feature_funcs(
            mdp_ref_obj.all_states),
        action_feature_funcs=FuncApproxBase.get_indicator_feature_funcs(
            {m.name
             for m in Move}),
        dnn_spec=None
        # dnn_spec=DNNSpec(
        #     neurons=[2, 4],
        #     hidden_activation=DNNSpec.relu,
        #     hidden_activation_deriv=DNNSpec.relu_deriv,
        #     output_activation=DNNSpec.identity,
        #     output_activation_deriv=DNNSpec.identity_deriv
        # )
    )

    raa = RunAllAlgorithms(mdp_refined=mdp_ref_obj,
                           tolerance=this_tolerance,
示例#3
0
    mdp_ref_obj = ic.get_mdp_refined()
    this_tolerance = 1e-3
    this_first_visit_mc = True
    num_samples = 30
    this_softmax = True
    this_epsilon = 0.05
    this_epsilon_half_life = 30
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 3000
    this_max_steps = 1000
    this_tdl_fa_offline = True
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=FuncApproxBase.get_identity_feature_funcs(
            ic.lead_time + 1
        ),
        action_feature_funcs=[lambda x: x],
        dnn_spec=DNNSpec(
            neurons=[2, 4],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv
        )
    )

    raa = RunAllAlgorithms(
        mdp_refined=mdp_ref_obj,
        tolerance=this_tolerance,
        first_visit_mc=this_first_visit_mc,
        num_samples=num_samples,
        softmax=this_softmax,
示例#4
0
 mdp_ref_obj = wg.get_mdp_refined()
 this_tolerance = 1e-3
 exploring_start = False
 this_first_visit_mc = True
 this_num_samples = 30
 this_softmax = False
 this_epsilon = 0.0
 this_epsilon_half_life = 100
 this_learning_rate = 0.1
 this_learning_rate_decay = 1e6
 this_lambd = 0.8
 this_num_episodes = 1000
 this_batch_size = 10
 this_max_steps = 1000
 this_td_offline = True
 state_ffs = FuncApproxBase.get_indicator_feature_funcs(
     mdp_ref_obj.all_states)
 sa_ffs = [(lambda x, f=f: f(x[0])) for f in state_ffs] +\
     [(lambda x, f=f: f(x[1])) for f in FuncApproxBase.get_indicator_feature_funcs(
         {m.name for m in Move}
     )]
 this_fa_spec = FuncApproxSpec(
     state_feature_funcs=state_ffs,
     sa_feature_funcs=sa_ffs,
     dnn_spec=None
     # dnn_spec=DNNSpec(
     #     neurons=[2, 4],
     #     hidden_activation=DNNSpec.relu,
     #     hidden_activation_deriv=DNNSpec.relu_deriv,
     #     output_activation=DNNSpec.identity,
     #     output_activation_deriv=DNNSpec.identity_deriv
     # )