def get_scenario(name): if name == 'incremental': sc = scenarios.IncrementalScenarioProvider(episode_length=CI_CYCLES) elif name == 'paintcontrol': sc = scenarios.IndustrialDatasetScenarioProvider(tcfile='DATA/paintcontrol.csv') elif name == 'iofrol': sc = scenarios.IndustrialDatasetScenarioProvider(tcfile='DATA/iofrol.csv') elif name == 'gsdtsr': sc = scenarios.IndustrialDatasetScenarioProvider(tcfile='DATA/gsdtsr.csv') return sc
def run_optimal(dataset, repo_path, datfile, sched_time_ratio=DEFAULT_SCHED_TIME_RATIO): scenario_provider = scenarios.IndustrialDatasetScenarioProvider( f"{repo_path}/{dataset}/features-engineered.csv", sched_time_ratio) logging.debug(f"Running for {dataset}") metric = NAPFDMetric('Verdict') if dataset in [ 'iofrol', 'paintcontrol', 'gsdtsr' ] else NAPFDMetric() i = 1 mean_fitness = 0 start = time.time() for (t, vsc) in enumerate(scenario_provider, start=1): metric.update_available_time(vsc.get_available_time()) actions = vsc.get_testcases() # Compute time start_exp = time.time() ind = [0] if (len(actions) > 1): # Run GA to find the best NAPFD in current commit ind = genetic_algorithm_tcp(actions, metric) end_exp = time.time() metric.evaluate(sort_update_actions(np.array(ind) + 1, actions)) i += 1 mean_fitness += metric.fitness logging.debug( f"commit: {t} - fitness: {metric.fitness} - duration: {end_exp - start_exp}" ) end = time.time() logging.debug(f"Time expend to run the experiments: {end - start}") logging.debug(mean_fitness / i) with open(datfile, 'w') as f: f.write(str(mean_fitness / i))
agent = agents.RandomAgent(histlen=args.histlen) elif args.agent == 'heur_sort': agent = agents.HeuristicSortAgent(histlen=args.histlen) elif args.agent == 'heur_weight': agent = agents.HeuristicWeightAgent(histlen=args.histlen) else: print('Unknown Agent') sys.exit() if args.scenario_provider == 'random': scenario_provider = scenarios.RandomScenarioProvider() elif args.scenario_provider == 'incremental': scenario_provider = scenarios.IncrementalScenarioProvider( episode_length=args.no_scenarios) elif args.scenario_provider == 'paintcontrol': scenario_provider = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/paintcontrol.csv') # scenario_provider = scenarios.FileBasedSubsetScenarioProvider(scheduleperiod=datetime.timedelta(days=1), # tcfile='paintcontrol.csv', # solfile='paintcontrol.csv') args.validation_interval = 0 elif args.scenario_provider == 'iofrol': scenario_provider = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/iofrol.csv') args.validation_interval = 0 elif args.scenario_provider == 'gsdtsr': scenario_provider = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/gsdtsr.csv') args.validation_interval = 0 elif args.scenario_provider == 'siemens_data': scenario_provider = scenarios.IndustrialDatasetScenarioProvider(
else: print('Unknown Agent') sys.exit() if args.scenario_provider == 'random': scenario_provider = scenarios.RandomScenarioProvider() elif args.scenario_provider == 'incremental': scenario_provider = scenarios.IncrementalScenarioProvider(episode_length=args.no_scenarios) elif args.scenario_provider == 'paintcontrol': #scenario_provider = scenarios.IndustrialDatasetScenarioProvider(tcfile='DATA/paintcontrol.csv') scenario_provider = scenarios.FileBasedSubsetScenarioProvider(scheduleperiod=datetime.timedelta(days=1), tcfile='tc_data_paintcontrol.csv', solfile='tc_sol_paintcontrol.csv') args.validation_interval = 0 elif args.scenario_provider == 'iofrol': scenario_provider = scenarios.IndustrialDatasetScenarioProvider(tcfile='DATA/iofrol.csv') args.validation_interval = 0 elif args.scenario_provider == 'gsdtsr': scenario_provider = scenarios.IndustrialDatasetScenarioProvider(tcfile='DATA/gsdtsr.csv') args.validation_interval = 0 if args.reward == 'binary': reward_function = reward.binary_positive_detection_reward elif args.reward == 'failcount': reward_function = reward.failcount elif args.reward == 'timerank': reward_function = reward.timerank elif args.reward == 'tcfail': reward_function = reward.tcfail prefix = '{}_{}_{}_lr{}_as{}_n{}_eps{}_hist{}_{}'.format(args.agent, args.scenario_provider, args.reward,
def run_optimal(dataset, repo_path, output_dir, sched_time_ratio): logging.debug(f"Running for {dataset}") metric = NAPFDVerdictMetric() if dataset in [ 'iofrol', 'paintcontrol', 'gsdtsr', 'lexisnexis' ] else NAPFDMetric() reward_functions = [RNFailReward(), RRankReward(), TimeRankReward()] all_data_file = "experiment;step;policy;reward_function;sched_time;sched_time_duration;prioritization_time;detected;missed;tests_ran;tests_not_ran;" \ + "ttf;time_reduction;fitness;cost;rewards;avg_precision\n" start = time.time() # 30 independent executions for i in range(1, 31): scenario_provider = scenarios.IndustrialDatasetScenarioProvider( f"{repo_path}/{dataset}/features-engineered.csv", sched_time_ratio) for (t, vsc) in enumerate(scenario_provider, start=1): metric.update_available_time(vsc.get_available_time()) actions = vsc.get_testcases() # Compute time start_exp = time.time() ind = [0] if (len(actions) > 1): # Run GA to find the best NAPFD in current commit ind = genetic_algorithm_tcp(actions, metric) end_exp = time.time() last_prioritization = sort_update_actions( np.array(ind) + 1, actions) metric.evaluate(last_prioritization) # Get the Test Case names last_prioritization = [tc['Name'] for tc in last_prioritization] time_reduction = scenario_provider.total_build_duration - metric.ttf_duration for reward_function in reward_functions: last_reward = reward_function.evaluate(metric, last_prioritization) all_data_file += f"{i};{t};GA;{reward_function.get_name()};" \ + f"{scenario_provider.avail_time_ratio};{vsc.get_available_time()};{end_exp - start_exp};" \ + f"{metric.detected_failures};{metric.undetected_failures};{len(metric.scheduled_testcases)};" \ + f"{len(metric.unscheduled_testcases)};{metric.ttf};{time_reduction};" \ + f"{metric.fitness};{metric.cost};{np.mean(last_reward)};{metric.avg_precision}\n" logging.debug( f"Exp {i} - Ep {t} - Policy GA - NAPFD/APFDc: {metric.fitness:.4f}/{metric.cost:.4f}" ) end = time.time() logging.debug(f"Time expend to run the experiments: {end - start}") print(f"Saving in {output_dir}/{dataset}.csv") with open(f"{output_dir}/{dataset}.csv", "w") as f: f.write(all_data_file)
from jmetal.operator.mutation import PermutationSwapMutation from jmetal.util.observer import PrintObjectivesObserver from jmetal.util.density_estimator import CrowdingDistance from jmetal.util.observer import PrintObjectivesObserver from jmetal.util.ranking import FastNonDominatedRanking from jmetal.util.solutions.comparator import MultiComparator from jmetal.util.termination_criterion import StoppingByEvaluations from problem import TCPCI if __name__ == "__main__": metric = NAPFDMetric() repo_path = "data" dataset = 'deeplearning4j@deeplearning4j' scenario_provider = scenarios.IndustrialDatasetScenarioProvider(f"{repo_path}/{dataset}/features-engineered.csv") for (t, vsc) in enumerate(scenario_provider, start=1): if(t > 1): break available_time = vsc.get_available_time() metric.update_available_time(available_time) test_cases = vsc.get_testcases() IND_SIZE = len(test_cases) if (IND_SIZE > 1): # Run GA to find the best NAPFD in current commit problem = TCPCI(metric=metric, test_cases=test_cases, number_of_variables=IND_SIZE)
# convert an array of values into a dataset matrix def create_dataset(dataset, look_back=1): dataX, dataY = [], [] for i in range(len(dataset) - look_back - 1): a = dataset[i:(i + look_back), 0] dataX.append(a) dataY.append(dataset[i + look_back, 0]) return numpy.array(dataX), numpy.array(dataY) # fix random seed for reproducibility numpy.random.seed(7) # load the dataset #dataframe = read_csv('airline-passengers.csv', usecols=[1], engine='python') sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/paintcontrol.csv') dataframe = sc.tcdf.loc[:, ('LastRun', 'Verdict')] dataset = dataframe.values #dataset = dataset.astype('float32') # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # split into train and test sets train_size = int(len(dataset) * 0.67) test_size = len(dataset) - train_size train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :] # reshape into X=t and Y=t+1
def get_scenario(name): if name == 'incremental': sc = scenarios.IncrementalScenarioProvider(episode_length=CI_CYCLES) elif name == 'paintcontrol': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/paintcontrol.csv') elif name == 'iofrol': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/iofrol.csv') elif name == 'gsdtsr': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/gsdtsr.csv') elif name == 'spectrum': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/spectrum.csv') elif name == 'group_01': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/group_01.csv') elif name == 'group_03': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/group_03.csv') elif name == 'group_06': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/group_06.csv') elif name == 'group_11': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/group_11.csv') elif name == 'mattermost': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/mattermost.csv') elif name == 'mm1': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/mattermost_retecs_in_1.csv') elif name == 'mm2': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/mattermost_retecs_in_2.csv') elif name == 'mm3': sc = scenarios.IndustrialDatasetScenarioProvider( tcfile='DATA/mattermost_retecs_in_3.csv') return sc