def exp_run_industrial_datasets(iteration, datasets=['iofrol', 'paintcontrol']): #, 'gsdtsr']): ags = [ #lambda: ( # agents.TableauAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, learning_rate=retecs.DEFAULT_LEARNING_RATE, # state_size=retecs.DEFAULT_STATE_SIZE, # action_size=retecs.DEFAULT_NO_ACTIONS, epsilon=retecs.DEFAULT_EPSILON), # retecs.preprocess_discrete, reward.timerank), #lambda: (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, state_size=retecs.DEFAULT_STATE_SIZE, # action_size=1, # hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES), retecs.preprocess_continuous, # reward.tcfail), lambda: (agents.LSTMAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, state_size=retecs.DEFAULT_STATE_SIZE, action_size=1, hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES), retecs.preprocess_continuous, reward.tcfail) ] reward_funs = { 'failcount': reward.failcount, 'timerank': reward.timerank, 'tcfail': reward.tcfail } avg_napfd = [] for i, get_agent in enumerate(ags): for sc in datasets: for (reward_name, reward_fun) in reward_funs.items(): agent, preprocessor, _ = get_agent() file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc, reward_name, iteration) scenario = get_scenario(sc) rl_learning = retecs.PrioLearning( agent=agent, scenario_provider=scenario, reward_function=reward_fun, preprocess_function=preprocessor, file_prefix=file_appendix, dump_interval=100, #100 validation_interval=0, output_dir=DATA_DIR, output_csv_dir=DATA_DIR_CSV) res = rl_learning.train(no_scenarios=CI_CYCLES, print_log=False, plot_graphs=False, save_graphs=False, collect_comparison=False) #(i == 0)) avg_napfd.append(res) return avg_napfd
def exp_run_industrial_datasets(iteration, datasets=['paintcontrol', 'iofrol', 'gsdtsr']): ags = [ lambda: (XCS_ER(2000, list(range(0, 45)), 6), retecs.preprocess_xcs, reward.tcfail), lambda: (XCS(2000, list(range( 0, 45)), 6), retecs.preprocess_xcs, reward.tcfail), lambda: (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, state_size=retecs.DEFAULT_STATE_SIZE, action_size=1, hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES), retecs.preprocess_continuous, reward.tcfail) ] reward_funs = { 'failcount': reward.failcount, 'timerank': reward.timerank, 'tcfail': reward.tcfail } avg_napfd = [] for i, get_agent in enumerate(ags): for sc in datasets: for (reward_name, reward_fun) in reward_funs.items(): agent, preprocessor, _ = get_agent() file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc, reward_name, iteration) scenario = get_scenario(sc) rl_learning = retecs.PrioLearning( agent=agent, scenario_provider=scenario, reward_function=reward_fun, preprocess_function=preprocessor, file_prefix=file_appendix, dump_interval=100, validation_interval=0, output_dir=DATA_DIR) res = rl_learning.train(no_scenarios=CI_CYCLES, print_log=False, plot_graphs=False, save_graphs=False, collect_comparison=(i == 0)) avg_napfd.append(res) return avg_napfd