def exp_run_industrial_datasets(iteration, datasets=['paintcontrol', 'iofrol', 'gsdtsr']): ags = [ lambda: (agents.TableauAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, learning_rate=retecs.DEFAULT_LEARNING_RATE, state_size=retecs.DEFAULT_STATE_SIZE, action_size=retecs.DEFAULT_NO_ACTIONS, epsilon=retecs.DEFAULT_EPSILON), retecs. preprocess_discrete, reward.timerank), lambda: (agents.NetworkAgent(histlen=retecs.DEFAULT_HISTORY_LENGTH, state_size=retecs.DEFAULT_STATE_SIZE, action_size=1, hidden_size=retecs.DEFAULT_NO_HIDDEN_NODES), retecs.preprocess_continuous, reward.tcfail) ] reward_funs = { 'failcount': reward.failcount, 'timerank': reward.timerank, 'tcfail': reward.tcfail } avg_napfd = [] for i, get_agent in enumerate(ags): for sc in datasets: for (reward_name, reward_fun) in list(reward_funs.items()): agent, preprocessor, _ = get_agent() file_appendix = 'rq_%s_%s_%s_%d' % (agent.name, sc, reward_name, iteration) scenario = get_scenario(sc) rl_learning = retecs.PrioLearning( agent=agent, scenario_provider=scenario, reward_function=reward_fun, preprocess_function=preprocessor, file_prefix=file_appendix, dump_interval=100, validation_interval=0, output_dir=DATA_DIR) res = rl_learning.train(no_scenarios=CI_CYCLES, print_log=True, plot_graphs=False, save_graphs=False, collect_comparison=(i == 0)) avg_napfd.append(res) return avg_napfd
## State is represented by [Duration Gro-up , Time Slice , Last Histlen Verdicts] ## Hence the state size is 2 + hislen ## In future Other Important features may also be added to represent the state e.g the priority etc. and the state size has to be changed then. state_size = 2 + args.histlen ## Preprocess Function are Used for State Representation.They are used for representing a test case as a state . ## Preprocess Continuous gives continuous values to the time group and the duration slice. ## Preprocess Discrete gives discrete values [0,1,2] to the time group and the duration state. preprocess_function = preprocess_discrete ## Initializing the agent if args.agent == 'tableau': agent = agents.TableauAgent(learning_rate=args.learning_rate, state_size=state_size, action_size=args.actions, epsilon=args.epsilon, histlen=args.histlen) ## If the action size is 1 , we use MLPClassifier ## for action size as 2, we use MLPRegressor elif args.agent == 'network': if args.reward in ('binary'): action_size = 1 else: action_size = 2 agent = agents.NetworkAgent(state_size=state_size, action_size=action_size, hidden_size=args.hiddennet, histlen=args.histlen)