decay_param=gamma_lambda, hidden_activation_deriv=self.hidden_activation_deriv, output_activation_deriv=self.output_activation_deriv ) if __name__ == '__main__': this_dnn_obj = DNNSpec( neurons=[2], hidden_activation=DNNSpec.relu, hidden_activation_deriv=DNNSpec.relu_deriv, output_activation=DNNSpec.identity, output_activation_deriv=DNNSpec.identity_deriv ) nn = DNN( feature_funcs=FuncApproxBase.get_identity_feature_funcs(3), dnn_obj=this_dnn_obj, reglr_coeff=0., learning_rate=1., adam=True, adam_decay1=0.9, adam_decay2=0.999 ) init_eval = nn.get_func_eval((2.0, 3.0, -4.0)) print(init_eval) x_pts = np.arange(-10.0, 10.0, 0.5) y_pts = np.arange(-10.0, 10.0, 0.5) z_pts = np.arange(-10.0, 10.0, 0.5) pts = [(x, y, z) for x in x_pts for y in y_pts for z in z_pts]
valid = wg.validate_spec() mdp_ref_obj = wg.get_mdp_refined() this_tolerance = 1e-3 this_first_visit_mc = True this_num_samples = 30 this_softmax = False this_epsilon = 0.05 this_epsilon_half_life = 100 this_learning_rate = 0.1 this_learning_rate_decay = 1e6 this_lambd = 0.8 this_num_episodes = 1000 this_max_steps = 1000 this_td_offline = True this_fa_spec = FuncApproxSpec( state_feature_funcs=FuncApproxBase.get_indicator_feature_funcs( mdp_ref_obj.all_states), action_feature_funcs=FuncApproxBase.get_indicator_feature_funcs( {m.name for m in Move}), dnn_spec=None # dnn_spec=DNNSpec( # neurons=[2, 4], # hidden_activation=DNNSpec.relu, # hidden_activation_deriv=DNNSpec.relu_deriv, # output_activation=DNNSpec.identity, # output_activation_deriv=DNNSpec.identity_deriv # ) ) raa = RunAllAlgorithms(mdp_refined=mdp_ref_obj, tolerance=this_tolerance,
mdp_ref_obj = ic.get_mdp_refined() this_tolerance = 1e-3 this_first_visit_mc = True num_samples = 30 this_softmax = True this_epsilon = 0.05 this_epsilon_half_life = 30 this_learning_rate = 0.1 this_learning_rate_decay = 1e6 this_lambd = 0.8 this_num_episodes = 3000 this_max_steps = 1000 this_tdl_fa_offline = True this_fa_spec = FuncApproxSpec( state_feature_funcs=FuncApproxBase.get_identity_feature_funcs( ic.lead_time + 1 ), action_feature_funcs=[lambda x: x], dnn_spec=DNNSpec( neurons=[2, 4], hidden_activation=DNNSpec.relu, hidden_activation_deriv=DNNSpec.relu_deriv ) ) raa = RunAllAlgorithms( mdp_refined=mdp_ref_obj, tolerance=this_tolerance, first_visit_mc=this_first_visit_mc, num_samples=num_samples, softmax=this_softmax,
mdp_ref_obj = wg.get_mdp_refined() this_tolerance = 1e-3 exploring_start = False this_first_visit_mc = True this_num_samples = 30 this_softmax = False this_epsilon = 0.0 this_epsilon_half_life = 100 this_learning_rate = 0.1 this_learning_rate_decay = 1e6 this_lambd = 0.8 this_num_episodes = 1000 this_batch_size = 10 this_max_steps = 1000 this_td_offline = True state_ffs = FuncApproxBase.get_indicator_feature_funcs( mdp_ref_obj.all_states) sa_ffs = [(lambda x, f=f: f(x[0])) for f in state_ffs] +\ [(lambda x, f=f: f(x[1])) for f in FuncApproxBase.get_indicator_feature_funcs( {m.name for m in Move} )] this_fa_spec = FuncApproxSpec( state_feature_funcs=state_ffs, sa_feature_funcs=sa_ffs, dnn_spec=None # dnn_spec=DNNSpec( # neurons=[2, 4], # hidden_activation=DNNSpec.relu, # hidden_activation_deriv=DNNSpec.relu_deriv, # output_activation=DNNSpec.identity, # output_activation_deriv=DNNSpec.identity_deriv # )