Exemplo n.º 1
0
def exp_run_industrial_datasets(iteration,
                                datasets=['paintcontrol', 'iofrol', 'gsdtsr']):
    ags = [
        lambda:
        (agents.TableauAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH,
                             learning_rate=retecs.DEFAULT_LEARNING_RATE,
                             state_size=retecs.DEFAULT_STATE_SIZE,
                             action_size=retecs.DEFAULT_NO_ACTIONS,
                             epsilon=retecs.DEFAULT_EPSILON), retecs.
         preprocess_discrete, reward.timerank), lambda:
        (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH,
                             state_size=retecs.DEFAULT_STATE_SIZE,
                             action_size=1,
                             hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES),
         retecs.preprocess_continuous, reward.tcfail)
    ]

    reward_funs = {
        'failcount': reward.failcount,
        'timerank': reward.timerank,
        'tcfail': reward.tcfail
    }

    avg_napfd = []

    for i, get_agent in enumerate(ags):
        for sc in datasets:
            for (reward_name, reward_fun) in list(reward_funs.items()):
                agent, preprocessor, _ = get_agent()
                file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc,
                                                    reward_name, iteration)

                scenario = get_scenario(sc)

                rl_learning = retecs.PrioLearning(
                    agent=agent,
                    scenario_provider=scenario,
                    reward_function=reward_fun,
                    preprocess_function=preprocessor,
                    file_prefix=file_appendix,
                    dump_interval=100,
                    validation_interval=0,
                    output_dir=DATA_DIR)
                res = rl_learning.train(no_scenarios=CI_CYCLES,
                                        print_log=True,
                                        plot_graphs=False,
                                        save_graphs=False,
                                        collect_comparison=(i == 0))
                avg_napfd.append(res)

    return avg_napfd
Exemplo n.º 2
0
    ## State is represented by [Duration Gro-up , Time Slice , Last Histlen Verdicts]
    ## Hence the state size is 2 + hislen
    ## In future Other Important features may also be added to represent the state e.g the priority etc. and the state size has to be changed then.
    state_size = 2 + args.histlen

    ## Preprocess Function are Used for State Representation.They are used for representing a test case as a state .
    ## Preprocess Continuous gives continuous values to the time group and the duration slice.
    ## Preprocess Discrete gives discrete values [0,1,2] to the time group and the duration state.
    preprocess_function = preprocess_discrete

    ## Initializing the agent
    if args.agent == 'tableau':
        agent = agents.TableauAgent(learning_rate=args.learning_rate,
                                    state_size=state_size,
                                    action_size=args.actions,
                                    epsilon=args.epsilon,
                                    histlen=args.histlen)

    ## If the action size is 1 , we use MLPClassifier
    ## for action size as 2, we use MLPRegressor
    elif args.agent == 'network':
        if args.reward in ('binary'):
            action_size = 1
        else:
            action_size = 2

        agent = agents.NetworkAgent(state_size=state_size,
                                    action_size=action_size,
                                    hidden_size=args.hiddennet,
                                    histlen=args.histlen)