def exp_run_industrial_datasets(iteration,
                                datasets=['iofrol',
                                          'paintcontrol']):  #, 'gsdtsr']):
    ags = [
        #lambda: (
        #    agents.TableauAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, learning_rate=retecs.DEFAULT_LEARNING_RATE,
        #                       state_size=retecs.DEFAULT_STATE_SIZE,
        #                        action_size=retecs.DEFAULT_NO_ACTIONS, epsilon=retecs.DEFAULT_EPSILON),
        #    retecs.preprocess_discrete, reward.timerank),
        #lambda: (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, state_size=retecs.DEFAULT_STATE_SIZE,
        #                             action_size=1,
        #                             hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES), retecs.preprocess_continuous,
        #         reward.tcfail),
        lambda: (agents.LSTMAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH,
                                  state_size=retecs.DEFAULT_STATE_SIZE,
                                  action_size=1,
                                  hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES),
                 retecs.preprocess_continuous, reward.tcfail)
    ]

    reward_funs = {
        'failcount': reward.failcount,
        'timerank': reward.timerank,
        'tcfail': reward.tcfail
    }

    avg_napfd = []

    for i, get_agent in enumerate(ags):
        for sc in datasets:
            for (reward_name, reward_fun) in reward_funs.items():
                agent, preprocessor, _ = get_agent()
                file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc,
                                                    reward_name, iteration)

                scenario = get_scenario(sc)

                rl_learning = retecs.PrioLearning(
                    agent=agent,
                    scenario_provider=scenario,
                    reward_function=reward_fun,
                    preprocess_function=preprocessor,
                    file_prefix=file_appendix,
                    dump_interval=100,  #100
                    validation_interval=0,
                    output_dir=DATA_DIR,
                    output_csv_dir=DATA_DIR_CSV)
                res = rl_learning.train(no_scenarios=CI_CYCLES,
                                        print_log=False,
                                        plot_graphs=False,
                                        save_graphs=False,
                                        collect_comparison=False)  #(i == 0))
                avg_napfd.append(res)

    return avg_napfd
示例#2
0
def exp_run_industrial_datasets(iteration,
                                datasets=['paintcontrol', 'iofrol', 'gsdtsr']):
    ags = [
        lambda: (XCS_ER(2000, list(range(0, 45)), 6), retecs.preprocess_xcs,
                 reward.tcfail), lambda: (XCS(2000, list(range(
                     0, 45)), 6), retecs.preprocess_xcs, reward.tcfail),
        lambda:
        (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH,
                             state_size=retecs.DEFAULT_STATE_SIZE,
                             action_size=1,
                             hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES),
         retecs.preprocess_continuous, reward.tcfail)
    ]

    reward_funs = {
        'failcount': reward.failcount,
        'timerank': reward.timerank,
        'tcfail': reward.tcfail
    }

    avg_napfd = []

    for i, get_agent in enumerate(ags):
        for sc in datasets:
            for (reward_name, reward_fun) in reward_funs.items():
                agent, preprocessor, _ = get_agent()
                file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc,
                                                    reward_name, iteration)

                scenario = get_scenario(sc)

                rl_learning = retecs.PrioLearning(
                    agent=agent,
                    scenario_provider=scenario,
                    reward_function=reward_fun,
                    preprocess_function=preprocessor,
                    file_prefix=file_appendix,
                    dump_interval=100,
                    validation_interval=0,
                    output_dir=DATA_DIR)
                res = rl_learning.train(no_scenarios=CI_CYCLES,
                                        print_log=False,
                                        plot_graphs=False,
                                        save_graphs=False,
                                        collect_comparison=(i == 0))
                avg_napfd.append(res)

    return avg_napfd