def create_control(params: SimpleNamespace, config_name) -> LoopControlV10: env = CarEnvV10(mode_energy_penalty = params.env_mode_energy_penalty, mode_random = params.env_mode_random, mode_limit_steps = params.env_mode_limit_steps, mode_time_penalty = params.env_mode_time_penalty) agent = SimpleAgentV10(env, devicestr = params.agent_device, gamma = params.agent_gamma_exp, buffer_size = params.agent_buffer_size, target_net_sync = params.agent_target_net_sync, eps_start = params.agent_simple_eps_start, eps_final = params.agent_simple_eps_final, eps_frames = params.agent_simple_eps_frames, ) bridge = SimpleBridgeV10(agent=agent, optimizer = params.bridge_optimizer, learning_rate = params.bridge_learning_rate, gamma = params.bridge_gamma, initial_population = params.bridge_initial_population, batch_size = params.bridge_batch_size, ) control = LoopControlV10( bridge = bridge, run_name = config_name, bound_avg_reward = params.loop_bound_avg_reward, logtb = params.loop_logtb, logfolder = "./../runs/runv00") return control
def create_control(params: SimpleNamespace, config_name) -> LoopControlV10: universe = InvestUniverse() env = RoboAdvisorEnvV10(universe, reward_average_count = params.env_reward_average_count, start_cash = params.env_start_cash, trading_cost = params.env_trading_cost, buy_volume = params.env_buy_volumne, proportional_buy_volume = params.env_proportional_buy_volume) agent = RoboAdvisorAgentV10(env, devicestr = params.agent_device, gamma = params.agent_gamma_exp, buffer_size = params.agent_buffer_size, target_net_sync = params.agent_target_net_sync, eps_start = params.agent_simple_eps_start, eps_final = params.agent_simple_eps_final, eps_frames = params.agent_simple_eps_frames, hidden_size = params.agent_hidden_size , hidden_layers = params.agent_hidden_layers , dueling_network = params.agent_dueling_network , steps_count = params.agent_steps_count , use_combined_replay_buffer = params.agent_use_combined_replay_buffer, ) bridge = SimpleBridgeV10(agent=agent, output_actions = len(universe.get_companies()), output_action_states = 3, optimizer = params.bridge_optimizer, learning_rate = params.bridge_learning_rate, gamma = params.bridge_gamma, initial_population = params.bridge_initial_population, batch_size = params.bridge_batch_size, ) control = LoopControlV10( bridge = bridge, run_name = config_name, bound_avg_reward = params.loop_bound_avg_reward, logtb = params.loop_logtb, logfolder = "./../runs/runv00") return control
def test_basic_init(): bridge = basic_init_bridge() LoopControlV10(bridge, "dummy")