def main(): step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.1, step_sizes=step_sizes, error_tol=0.0005, nodes_per_integ=dim_state) num_episodes = 500 predictors = [PredictorConst(i) for i in range(dim_action)] predictors.append( PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin'))) scores = benchmark( predictors, IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')), num_episodes, env) # scores = benchmark(predictors, # Simpson(), # num_episodes, # env) print(scores / num_episodes)
def main(): # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 dim_action = len(step_sizes) memory = 1 env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.075, error_tol=7.5e-6, nodes_per_integ=dim_state, memory=memory, x0=0, max_dist=20, step_size_range=(step_sizes[0], step_sizes[-1])) scaler = load('scaler.bin') predictor = PredictorQ( step_sizes=step_sizes, model=build_value_model(dim_state=dim_state, dim_action=dim_action, filename=None, lr=0.00001, memory=memory), scaler=load('model_quad/model_sinus/Simpson/scaler.bin')) integrator = Simpson() estimator = Estimator(build_estimator_model(dim_state, lr=0.0001, filename='estimator'), scaler, threshold=100 * 7.5e-6) train_model(estimator, env, predictor, integrator, 5000, scaler)
def one_fun_boole(): x0 = 0 x1 = 10 step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] dim_state = 5 dim_action = len(step_sizes) memory = 1 env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.1, step_sizes=step_sizes, error_tol=0.000001, memory=memory, nodes_per_integ=dim_state) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor', memory=memory), load('scaler_boole_mem1.bin')) # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')) integrator = Boole() _, evals, x1, errors = integrate_env(predictor, integrator, env, x0, x1, plot=True) print('new x1: {}'.format(x1)) print('Predictor error total: {}'.format(np.sum(errors))) print('Predictor error per step: {}'.format(np.mean(errors))) print('Predictor evals: {}'.format(evals)) print('') env.reset(reset_params=False) booles = BoolesRule(env.fun, x0, x1) integ_simps, errors = booles(num_evals=evals, stepwise_error=True) print('Boole error total: {}'.format(np.sum(errors))) print('Boole error per step: {}'.format(np.mean(errors))) print('Boole evals: {}'.format(booles.evals)) print('') booles.plot() env.reset(reset_params=False) simps = Simps(env.fun, x0, x1) integ_simps, errors = simps(num_evals=evals, stepwise_error=True) print('Simpson error total: {}'.format(np.sum(errors))) print('Simpson error per step: {}'.format(np.mean(errors))) print('Simpson evals: {}'.format(simps.evals)) print('') simps.plot() env.reset(reset_params=False) rom = Romberg(env.fun, x0, x1, tol=0.0005, order=3) integ, errors = rom(0.15, stepwise_errors=True) print('Romberg error total: {}'.format(np.sum(errors))) print('Romberg error per step: {}'.format(np.mean(errors))) print('Romberg evals: {}'.format(rom.evals)) rom.plot()
def test_sinus(): f = Sinus() tol = 0.0005 integ_rom = 0.0 evals_rom_step = 0 rom = Romberg(f, 0, 10, tol=tol, order=6) for j in range(10): rom = Romberg(f, 0, 10, tol=tol, order=6) integ_rom, errors = rom(0.15, True) error_rom_step = np.mean(errors) print(error_rom_step) evals_rom_step = rom.evals if error_rom_step < 0.0001: tol *= 2.0 elif error_rom_step > 0.0005: tol /= 3.0 else: break print('evaluations: {}'.format(evals_rom_step)) print('global error: {}'.format(abs(integ_rom - f.integral(0, 20)))) rom.plot()
def save_scaler(): # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4] step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.075, error_tol=7.5e-6, nodes_per_integ=3, memory=1, x0=-1, max_dist=2, step_size_range=(step_sizes[0], step_sizes[-1])) # build Scaler scaler = StandardScaler() scaler.fit(env.sample_states(50000)) dump(scaler, 'scaler_mem1.bin', compress=True)
def one_fun(): x0 = 0.0 x1 = 10.0 step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.15, step_sizes=step_sizes, error_tol=0.0005) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin')) integ, evals, x1, _ = integrate_env(predictor, Simpson(), env, x0, x1, plot=True) print('new x1: {}'.format(x1)) print('Predictor error: {}'.format(abs(env.fun.integral(x0, x1) - integ))) print('Predictor evals: {}'.format(evals)) env.reset(reset_params=False) asr = AdaptSimpsConstEvals(env.fun, x0, x1) integ = asr(evals) print('ASR error: {}'.format(abs(env.fun.integral(x0, x1) - integ))) print('ASR evals: {}'.format(asr.evals)) asr.plot() env.reset(reset_params=False) simps = Simps(env.fun, x0, x1) integ_simps = simps(num_evals=evals) print('Simpson error: {}'.format( abs(env.fun.integral(x0, x1) - integ_simps))) print('Simpson evals: {}'.format(simps.evals)) simps.plot()
def main(): gamma = 0.0 num_episodes = 100000 # step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4] step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] # step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 # nodes per integration step dim_action = len(step_sizes) memory = 0 # how many integration steps the predictor can look back # 7.5e-6 env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.075, error_tol=7.5e-6, nodes_per_integ=dim_state, memory=memory, x0=0, max_dist=20, step_size_range=(step_sizes[0], step_sizes[-1])) # env = IntegrationEnv(fun=Sinus(), max_iterations=128, initial_step_size=0.1, step_sizes=step_sizes, # error_tol=0.0005, nodes_per_integ=dim_state, memory=memory) experience = Experience(batch_size=32) predictor = PredictorQ( step_sizes=step_sizes, model=build_value_model(dim_state=dim_state, dim_action=dim_action, filename=None, lr=0.00001, memory=memory), scaler=load('model_quad/model_sinus/Simpson/scaler.bin')) # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')) # integrator = Boole() integrator = Simpson() perf_tracker = PerformanceTracker(env, num_testfuns=1000, x0=-1, x1=1) # losses = [] # moving_average = [] for episode in range(num_episodes): state = env.reset() reward_total = 0 loss_this_episode = 0 steps = 0 done = False eps = 0.66 if episode < 0: # eps = 0.01 + (1.0 - 0.01) * math.exp(-0.023 * episode eps = 0.2 + 0.8 * 2.71828**( -0.0146068 * episode ) # decrease from 1.0 to approx 0.2 at episode 300 print('episode: {}'.format(episode)) while not done: # get action from actor actions = predictor.get_actions(state) if episode < 0: action = choose_action(actions, eps, dim_action) else: action = choose_action3(actions, eps, dim_action) step_size = predictor.action_to_stepsize(action) # execute action next_state, reward, done, _ = env.iterate(step_size, integrator) steps += 1 reward_total += reward # learning action_next_state = predictor.get_actions(next_state) target = reward + gamma * np.max(action_next_state) target_actions = actions.squeeze() target_actions[action] = target # print(target) # print('') experience.append(state=state, target=target_actions) if experience.is_full() or done: states, targets = experience.get_samples() loss_predictor = predictor.train_on_batch(states, targets) loss_this_episode += loss_predictor experience.reset() state = next_state print('reward: {}'.format(reward_total)) print('loss_predictor: {}'.format(loss_this_episode)) # losses.append(loss_this_episode) # if episode % 10 == 0 and len(losses) > 99: # moving_average.append(np.mean(losses[-100:])) # plt.plot(moving_average, 'r') # plt.pause(0.05) if episode % 100 == 0: perf_tracker.evaluate_performance(predictor, integrator) perf_tracker.plot() perf_tracker.plot_pareto(num_points=7) # if episode % 250 == 0: # env.plot(episode=episode, x_min=-1.5, x_max=1.5) if episode % 10 == 0: predictor.model.save_weights('predictor')
def compare_romberg(): x0 = 0.0 num_samples = 100 error_predictor = [] error_rom = [] evals_rom = [] evals_predictor = [] step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.15, step_sizes=step_sizes, error_tol=0.0005) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin')) for i in range(num_samples): if i % 10 == 0: print(i) x1 = 20.0 # model env.reset() _, evals, x1, errors = integrate_env(predictor, Simpson(), env, x0, x1) error_pred_step = np.mean(errors) env.reset(reset_params=False) # romberg tol = 0.0003 rom = Romberg(env.fun, x0, x1, tol=tol, order=2) integ_rom, errors = rom(0.15, True) error_rom_step = np.mean(errors) evals_rom_step = rom.evals # for j in range(10): # rom = Romberg(env.fun, x0, x1, tol=tol, order=2) # integ_rom, errors = rom(0.15, True) # error_rom_step = np.mean(errors) # evals_rom_step = rom.evals # if error_rom_step < 0.0001: # tol *= 2.0 # elif error_rom_step > 0.0005: # tol /= 3.0 # else: # break error_predictor.append(error_pred_step) error_rom.append(error_rom_step) evals_predictor.append(evals) evals_rom.append(evals_rom_step) # error_rom = np.array(error_rom) # not_converged = np.concatenate((error_rom[error_rom > 0.0005], error_rom[error_rom < 0.0001])) # if len(not_converged > 0): # print('romberg did not converge in some cases:') # print(not_converged) mean_error_predictor = np.mean(error_predictor) var_error_predictor = np.var(error_predictor) mean_error_rom = np.mean(error_rom) var_error_rom = np.var(error_rom) mean_evals_predictor = np.mean(evals_predictor) var_evals_predictor = np.var(evals_predictor) mean_evals_rom = np.mean(evals_rom) var_evals_rom = np.var(evals_rom) print( 'Avg. predictor number of function evaluations per episode: {}'.format( mean_evals_predictor)) print('Avg. predictor error per step: {}'.format(mean_error_predictor)) print('Avg. romberg number of function evaluations per episode: {}'.format( mean_evals_rom)) print('Avg. romberg error per step: {}'.format(mean_error_rom)) print('') print( 'Variance of predictor number of function evaluations per episode: {}'. format(var_evals_predictor)) print( 'Variance of predictor error per step: {}'.format(var_error_predictor)) print('Variance of rom number of function evaluations per episode: {}'. format(var_evals_rom)) print('Variance of rom error per step: {}'.format(var_error_rom))