def main(args): if len(args) < 1: sys.stderr.write("Error - one required arguments: <data directory>\n") sys.exit(-1) working_dir = args[0] print("Reading data...") Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data(working_dir) Y_array = np.array(Y) # print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape))) num_examples, dimension = X_array.shape num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet) num_y_examples = len(Y) assert num_examples == num_y_examples Y_adj, indices = ctk_io.flatten_outputs(Y_array) train_x, valid_x, train_y, valid_y = train_test_split(X_array, Y_array, test_size=0.2, random_state=18) optim = RandomSearch( lambda: get_random_config(), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feature_alphabet), num_outputs), ) best_config = optim.optimize() print("Best config: %s" % best_config)
def main(args): if len(args) < 1: sys.stderr.write("Error - one required arguments: <data directory>\n") sys.exit(-1) working_dir = args[0] print("Reading data...") Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data( working_dir) Y_array = np.array(Y) #print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape))) num_examples, dimension = X_array.shape num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet) num_y_examples = len(Y) assert num_examples == num_y_examples Y_adj, indices = ctk_io.flatten_outputs(Y_array) train_x, valid_x, train_y, valid_y = train_test_split(X_array, Y_array, test_size=0.2, random_state=18) optim = RandomSearch( lambda: get_random_config(), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feature_alphabet), num_outputs)) best_config = optim.optimize() print("Best config: %s" % best_config)
def main(args): if len(args) < 1: sys.stderr.write("Error - one required arguments: <data directory>\n") sys.exit(-1) working_dir = args[0] print("Reading data...") Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data(working_dir) X_segments, dimensions = split_entity_data(X_array, feature_alphabet) Y_array = np.array(Y) Y_adj, indices = ctk_io.flatten_outputs(Y_array) num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet) num_y_examples = len(Y) train_x0, valid_x0, train_x1, valid_x1, train_x2, valid_x2, train_y, valid_y = train_test_split(X_segments[0], X_segments[1], X_segments[2], Y_array, test_size=0.2, random_state=18) train_x = [train_x0, train_x1, train_x2] valid_x = [valid_x0, valid_x1, valid_x2] optim = RandomSearch(lambda: get_random_config(), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feature_alphabet), num_outputs ) ) best_config = optim.optimize() print("Best config: %s" % best_config)
def __init__(self, problem_instance, random_state, population_size, selection, crossover, p_c, mutation, p_m, pressure): RandomSearch.__init__(self, problem_instance, random_state) self.population_size = population_size self.selection1 = selection self.selection2 = selection self.crossover1 = crossover self.crossover2 = crossover self.p_c2 = p_c self.p_c1 = p_c self.mutation1 = mutation self.mutation2 = mutation self.p_m = p_m self.p_m = p_m self.repetition1 = 0 self.repetition2 = 0 self.population_size1 = population_size / 2 self.population_size2 = population_size / 2 self.flag1 = False self.flag2 = False self.control = 2 self.presure1 = pressure self.presure2 = pressure self.count = 0 self.variation1 = 1 self.variation2 = 1
def main(args): if len(args) < 1: sys.stderr.write("Error - one required arguments: <data directory>\n") sys.exit(-1) working_dir = args[0] print("Reading data...") Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data( working_dir) X_segments, dimensions = split_entity_data(X_array, feature_alphabet) Y_array = np.array(Y) Y_adj, indices = ctk_io.flatten_outputs(Y_array) num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet) num_y_examples = len(Y) train_x0, valid_x0, train_x1, valid_x1, train_x2, valid_x2, train_y, valid_y = train_test_split( X_segments[0], X_segments[1], X_segments[2], Y_array, test_size=0.2, random_state=18) train_x = [train_x0, train_x1, train_x2] valid_x = [valid_x0, valid_x1, valid_x2] optim = RandomSearch( lambda: get_random_config(), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feature_alphabet), num_outputs)) best_config = optim.optimize() print("Best config: %s" % best_config)
def __init__(self, problem_instance, random_state, neighborhood_size, neighborhood_function=bit_flip): RandomSearch.__init__(self, problem_instance, random_state) self.neighborhood_size = neighborhood_size self.neighborhood_function = neighborhood_function
def __init__(self, problem_instance, random_state, population_size, selection, crossover, p_c, mutation, p_m): RandomSearch.__init__(self, problem_instance, random_state) self.population_size = population_size self.selection = selection self.crossover = crossover self.p_c = p_c self.mutation = mutation self.p_m = p_m
def main(args): #np.random.seed(1337) if len(args) < 1: sys.stderr.write("Error - one required argument: <data directory>\n") sys.exit(-1) working_dir = args[0] data_file = os.path.join(working_dir, 'training-data.liblinear') # learn alphabet from training data provider = dataset.DatasetProvider(data_file) # now load training examples and labels train_x, train_y = provider.load(data_file) # turn x and y into numpy array among other things maxlen = max([len(seq) for seq in train_x]) classes = len(set(train_y)) train_x = pad_sequences(train_x, maxlen=maxlen) train_y = to_categorical(np.array(train_y), classes) #loading pre-trained embedding file: embeddings_index = {} f = open(os.path.join(working_dir, 'mimic.txt')) values = f.readline().split() EMBEDDING_WORDNUM = int(values[0]) EMBEDDING_DIM = int(values[1]) for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('load embeddings for %s=%s words.' % (len(embeddings_index), EMBEDDING_WORDNUM)) # prepare embedding matrix nb_words = len(provider.word2int) embedding_matrix = np.zeros((nb_words, EMBEDDING_DIM)) for word, i in provider.word2int.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector print 'train_x shape:', train_x.shape print 'train_y shape:', train_y.shape #train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=0.1, random_state=18) optim = RandomSearch( lambda: get_random_config(), lambda x, y: run_one_eval( x, y, train_x, train_y, maxlen, len(provider.word2int), classes, embedding_matrix, EMBEDDING_DIM)) best_config = optim.optimize() print("Best config: %s" % best_config) sys.exit(0)
def main(args): if len(args) < 1: sys.stderr.write("Error - one required argument: <data directory>\n") sys.exit(-1) working_dir = args[0] print("Reading data...") Y, outcome_map, outcome_list, X, feature_alphabet = ctk_io.read_multitask_token_sequence_data(working_dir) start_ind = feature_alphabet[start_symbol] end_ind = feature_alphabet[end_symbol] train_x, valid_x, train_y, valid_y = train_test_split(X, Y, test_size=0.2, random_state=7) # X_distance = get_distance_features(X, start_ind, end_ind) print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape))) num_examples, dimension = X.shape num_y_examples, num_labels = Y.shape assert num_examples == num_y_examples weights = None if len(args) > 1: weights = ctk_io.read_embeddings(args[1], feats_alphabet) train_y_adj, train_indices = ctk_io.flatten_outputs(train_y) valid_y_adj, valid_indices = ctk_io.flatten_outputs(valid_y) if not train_indices == valid_indices: print("Error: training and valid sets have different index sets -- may be missing some labels in one set or the other") sys.exit(-1) output_dims_list = [] train_y_list = [] valid_y_list = [] indices = train_indices for i in range(len(indices)-1): label_dims = indices[i+1] - indices[i] output_dims_list.append(label_dims) if label_dims == 1: train_y_list.append(train_y_adj[:, indices[i]]) valid_y_list.append(valid_y_adj[:, indices[i]]) else: train_y_list.append(train_y_adj[:, indices[i]:indices[i+1]]) valid_y_list.append(valid_y_adj[:, indices[i]:indices[i+1]]) print("Dimensions of label %d are %s" % (i, str(train_y_list[-1].shape) ) ) ## pass a function to the search that it uses to get a random config ## and a function that it will get an eval given (e)pochs and (c)onfig file: optim = RandomSearch(lambda: get_random_config(weights), lambda e, c: run_one_eval(e, c, train_x, train_y_list, valid_x, valid_y_list, len(feature_alphabet), output_dims_list, weights ) ) best_config = optim.optimize(max_iter=27) open(os.path.join(working_dir, 'model_0.config'), 'w').write( str(best_config) ) print("Best config returned by optimizer is %s" % str(best_config) )
def __init__(self, problem_instance, random_state, population_size, selection, crossover, p_c, mutation, p_m, presure): RandomSearch.__init__(self, problem_instance, random_state) self.population_size = population_size self.selection = selection self.crossover = crossover self.p_c = p_c self.mutation = mutation self.p_m = p_m self.presure = presure self.reproduttive_guys = [] self.repetition = 0
def main(args): if len(args) < 1: sys.stderr.write("Error - one required argument: <data directory> [(optional) weights file]\n") sys.exit(-1) working_dir = args[0] (labels, label_alphabet, feats, feats_alphabet) = ctk_io.read_bio_sequence_data(working_dir) weights = None if len(args) > 1: weights = ctk_io.read_embeddings(args[1], feats_alphabet) maxlen = max([len(seq) for seq in feats]) all_x = pad_sequences(feats, maxlen=maxlen) all_y = ctk_io.expand_labels(pad_sequences(labels, maxlen=maxlen), label_alphabet) train_x, valid_x, train_y, valid_y = train_test_split(all_x, all_y, test_size=0.2, random_state=7) optim = RandomSearch(lambda: get_random_config(weights), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feats_alphabet), len(label_alphabet), weights ) ) best_config = optim.optimize() open(os.path.join(working_dir, 'model_0.config'), 'w').write( str(best_config) ) print("Best config returned by optimizer is %s" % str(best_config) ) if not best_config['pretrain']: weights = None model = get_model_for_config(train_x.shape, len(feats_alphabet), len(label_alphabet), best_config, weights=weights) model.fit(all_x, all_y, nb_epoch=40, batch_size=best_config['batch_size'], verbose=1, validation_split=0.1) model.summary() json_string = model.to_json() open(os.path.join(working_dir, 'model_0.json'), 'w').write(json_string) model.save_weights(os.path.join(working_dir, 'model_0.h5'), overwrite=True) fn = open(os.path.join(working_dir, 'alphabets.pkl'), 'w') pickle.dump( (feats_alphabet, label_alphabet), fn) fn.close() with ZipFile(os.path.join(working_dir, 'script.model'), 'w') as myzip: myzip.write(os.path.join(working_dir, 'model_0.json'), 'model_0.json') myzip.write(os.path.join(working_dir, 'model_0.h5'), 'model_0.h5') myzip.write(os.path.join(working_dir, 'alphabets.pkl'), 'alphabets.pkl')
def run_random_search(seconds): print('Running Random Search algoritm for ' + str(seconds) + ' seconds...') print() rs = RandomSearch(states, seconds, inc_support, dec_support) rs.run() print('Found optimal route with value of ' + str(rs.best_solution.value) + '.') print( str(rs.best_solution.calculate_real_value()) + ' electoral votes were collected.') rs.best_solution.print() print()
def __init__(self, problem_instance, random_state, population_size, selection, crossover, p_c, mutation, p_m): RandomSearch.__init__(self, problem_instance, random_state) self.population_size = population_size self.selection = selection self.crossover = crossover self.p_c = p_c self.mutation = mutation self.p_m = p_m self.repetition = 0 self.presure = 0.2 self.list = [] self.list_iteration = []
def main(): target_string = "Hello World!" population_size = 1400 runs = 10 """ The main method for the application. The Genetic Algorithm uses tournament selection as selection method and k-point or one-point crossover as crossover methods. The Genetic Algorithm constructor takes in the following parameters: @param: target_string The target string @param: population_size Amount of randomly generated strings @param: crossover_rate Crossover Rate in percentage (i.e. 1 = 100%) @param: mutation_rate Mutation Rate in percentage @param: is_k_point_crossover Choose whether to choose k-point crossover as crossover method. If false, one-point crossover is performed @param: tournament_size_percent Percentage of population to participate in tournaments for selection @param: strongest_winner_probability Probability of strongest participant in tournament to win, as well as the second strongest's probability """ ga = GeneticAlgorithm(target_string, population_size, 0.8, 0.05, True, 0.05, 0.65) ga.set_show_each_chromosome(False) ga.set_show_crossover_internals(False) ga.set_show_mutation_internals(False) ga.set_silent( False) # If False it shows the fittest chromosome of each generation ga.run(runs) ga.get_stats() """ The Hill Climbing constructor takes in the following parameters: @param: target_string The target string @param: solutions_size Amount of solutions (strings) to search for a better solution in """ hc = HillClimbing(target_string, population_size) hc.set_show_each_solution(False) hc.set_silent(True) hc.run(runs) hc.get_stats() """ The Random Search constructor takes in the following parameters: @param: target_string The target string @param: solutions_size Amount of solutions (strings) generated randomly each round in which the solution is searched for """ rs = RandomSearch(target_string, population_size) rs.set_show_each_solution(False) rs.set_silent(True) rs.run(runs) rs.get_stats()
def main(parameters): # if (len(parameters) - 1) != 7: # print("Missing required parameters: (regressor, input_file, \ # output_folder, max_iter, seed)") regressor = parameters[1] input_file = parameters[2] output_folder = parameters[3] max_iter = int(parameters[4]) inner_seed = int(parameters[5]) n_jobs = int(parameters[6]) data_tag = parameters[7] outer_seed = int(parameters[8]) tuning_seed = int(parameters[9]) outer_fold = int(parameters[10]) must_normalize = parameters[11] == 'True' output_folder = os.path.join(output_folder, regressor, data_tag, "outer_fold" + str(outer_fold)) if not os.path.exists(output_folder): os.makedirs(output_folder) data = pd.read_csv(input_file) # Removing ID column data = data.iloc[:, 1:] X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values kf = KFold(n_splits=10, random_state=outer_seed, shuffle=True) for k, (train_index, test_index) in enumerate(kf.split(X)): if k + 1 == outer_fold: X_train, y_train = X[train_index], y[train_index] # X_test, y_test = X[test_index], y[test_index] break rs = RandomSearch(get_search_space(algorithm=regressor), max_iter=max_iter, n_jobs=n_jobs, random_state=tuning_seed) # best_conf = rs.fmin(objective=objective, predictor=get_regressor(algorithm=regressor), loss_func_tuning=RRMSE, X=X_train, y=y_train, seed=inner_seed, model_name=regressor, output_folder=output_folder, data_tag=data_tag, must_normalize=must_normalize)
def main(): exp_dir = 'search_{}_{}'.format(args.algorithm, time.strftime("%Y%m%d-%H%M%S")) if not os.path.exists(exp_dir): os.mkdir(exp_dir) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(exp_dir, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info('args = %s', args) if args.algorithm == 'PPO' or args.algorithm == 'PG': torch.manual_seed(args.seed) np.random.seed(args.seed) if torch.cuda.is_available(): device = torch.device('cuda:{}'.format(str(args.gpu))) cudnn.benchmark = True cudnn.enable = True logging.info('using gpu : {}'.format(args.gpu)) torch.cuda.manual_seed(args.seed) else: device = torch.device('cpu') logging.info('using cpu') if args.algorithm == 'PPO': ppo = PPO(args, device) ppo.multi_solve_environment() elif args.algorithm == 'PG': pg = PolicyGradient(args, device) pg.multi_solve_environment() else: rs = RandomSearch(args) rs.multi_solve_environment()
def __init__(self, env): super().__init__(env) # Initial generation size self.initial_gen_size = INITIAL_GEN_SIZE # Number of generations to run for self.num_generations = NUM_GENERATIONS # Number of parents to keep each generation self.num_elite = NUM_ELITE # Number of children each generation self.num_children_per_parent_comb = NUM_CHILDREN_PER_PARENT_COMB # Probability of mutation self.mutation_prob = MUTATION_PROB # Use random policy generator from RandomSearch learner self.get_policy = RandomSearch(env).get_policy
base = os.environ['DATA_ROOT'] train_dir = os.path.join(base, cfg.get('data', 'train')) code_file = os.path.join(base, cfg.get('data', 'codes')) provider = dataset.DatasetProvider(train_dir, code_file, cfg.getint('args', 'min_token_freq'), cfg.getint('args', 'max_tokens_in_file'), cfg.getint('args', 'min_examples_per_code'), use_cuis=False) x, y = provider.load(tokens_as_set=False) maxlen = max([len(seq) for seq in x]) x = pad_sequences(x, maxlen=maxlen) y = np.array(y) print('x shape:', x.shape) print('y shape:', y.shape) print('max seq len:', maxlen) print('vocab size:', x.max() + 1) print('number of features:', len(provider.token2int)) print('number of labels:', len(provider.code2int)) model = CnnCodePredictionModel() search = RandomSearch(model, x, y) best_config = search.optimize(max_iter=64) print('best config:', best_config)
def main(args): if len(args) < 1: sys.stderr.write( "Error - one required argument: <data directory> [(optional) weights file]\n" ) sys.exit(-1) working_dir = args[0] (labels, label_alphabet, feats, feats_alphabet) = ctk_io.read_bio_sequence_data(working_dir) weights = None if len(args) > 1: weights = ctk_io.read_embeddings(args[1], feats_alphabet) maxlen = max([len(seq) for seq in feats]) all_x = pad_sequences(feats, maxlen=maxlen) all_y = ctk_io.expand_labels(pad_sequences(labels, maxlen=maxlen), label_alphabet) train_x, valid_x, train_y, valid_y = train_test_split(all_x, all_y, test_size=0.2, random_state=7) optim = RandomSearch( lambda: get_random_config(weights), lambda x, y: run_one_eval( x, y, train_x, train_y, valid_x, valid_y, len(feats_alphabet), len(label_alphabet), weights)) best_config = optim.optimize() open(os.path.join(working_dir, 'model_0.config'), 'w').write(str(best_config)) print("Best config returned by optimizer is %s" % str(best_config)) if not best_config['pretrain']: weights = None model = get_model_for_config(train_x.shape, len(feats_alphabet), len(label_alphabet), best_config, weights=weights) model.fit(all_x, all_y, nb_epoch=40, batch_size=best_config['batch_size'], verbose=1, validation_split=0.1) model.summary() json_string = model.to_json() open(os.path.join(working_dir, 'model_0.json'), 'w').write(json_string) model.save_weights(os.path.join(working_dir, 'model_0.h5'), overwrite=True) fn = open(os.path.join(working_dir, 'alphabets.pkl'), 'w') pickle.dump((feats_alphabet, label_alphabet), fn) fn.close() with ZipFile(os.path.join(working_dir, 'script.model'), 'w') as myzip: myzip.write(os.path.join(working_dir, 'model_0.json'), 'model_0.json') myzip.write(os.path.join(working_dir, 'model_0.h5'), 'model_0.h5') myzip.write(os.path.join(working_dir, 'alphabets.pkl'), 'alphabets.pkl')
from annealing import Annealer from sat3cnf import SAT3CNF from random_search import RandomSearch import numpy as np def imprime_solucao(solucao): for (c, v) in solucao.items(): print('Clausula %s: %d' % (c, v)) if __name__ == '__main__': files = ['uf20-01.cnf', 'uf100-01.cnf', 'uf250-01.cnf'] for fname in files: print("Resolvendo ", fname) sat = SAT3CNF(fname) print(str(sat.n_clausulas) + ' clausulas') sim_annealing = Annealer(sat) print("\nSimulated Annealing") s_ann = sim_annealing.resolver()[0] random_search = RandomSearch(sat) print("\nRandom Search") s_random = random_search.resolver() imprime_solucao(s_ann) print("\n\n")
def main(args): if len(args) < 1: sys.stderr.write("Error - one required argument: <data directory>\n") sys.exit(-1) working_dir = args[0] print("Reading data...") Y, outcome_map, outcome_list, X, feature_alphabet = ctk_io.read_multitask_token_sequence_data( working_dir) start_ind = feature_alphabet[start_symbol] end_ind = feature_alphabet[end_symbol] train_x, valid_x, train_y, valid_y = train_test_split(X, Y, test_size=0.2, random_state=7) # X_distance = get_distance_features(X, start_ind, end_ind) print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape))) num_examples, dimension = X.shape num_y_examples, num_labels = Y.shape assert num_examples == num_y_examples weights = None if len(args) > 1: weights = ctk_io.read_embeddings(args[1], feats_alphabet) train_y_adj, train_indices = ctk_io.flatten_outputs(train_y) valid_y_adj, valid_indices = ctk_io.flatten_outputs(valid_y) if not train_indices == valid_indices: print( "Error: training and valid sets have different index sets -- may be missing some labels in one set or the other" ) sys.exit(-1) output_dims_list = [] train_y_list = [] valid_y_list = [] indices = train_indices for i in range(len(indices) - 1): label_dims = indices[i + 1] - indices[i] output_dims_list.append(label_dims) if label_dims == 1: train_y_list.append(train_y_adj[:, indices[i]]) valid_y_list.append(valid_y_adj[:, indices[i]]) else: train_y_list.append(train_y_adj[:, indices[i]:indices[i + 1]]) valid_y_list.append(valid_y_adj[:, indices[i]:indices[i + 1]]) print("Dimensions of label %d are %s" % (i, str(train_y_list[-1].shape))) ## pass a function to the search that it uses to get a random config ## and a function that it will get an eval given (e)pochs and (c)onfig file: optim = RandomSearch( lambda: get_random_config(weights), lambda e, c: run_one_eval( e, c, train_x, train_y_list, valid_x, valid_y_list, len(feature_alphabet), output_dims_list, weights)) best_config = optim.optimize(max_iter=27) open(os.path.join(working_dir, 'model_0.config'), 'w').write(str(best_config)) print("Best config returned by optimizer is %s" % str(best_config))
help="method to test", type="string", default="rl") (kwargs, args) = parser.parse_args() prob_env_name, prob_env_class = get_prob_env_name_class( kwargs.prob_env_dir) prob_env = prob_env_class.load(kwargs.prob_env_dir) if kwargs.method == 'random': # problem environment has not fixed x_o. # however, we want to fix x_o for monto carlo random search assert not prob_env.if_set_fixed_xo() prob_env.set_fixed_xo(prob_env.x_o) assert prob_env.if_set_fixed_xo() opt = RandomSearch(prob_env) cpu_time = get_cpu_time() print("before test {}, cpu time: {}".format(kwargs.method, cpu_time)) _, opt_state, _, _, duration, call_counts = \ opt.random_search( iteration_limit=int(9e30), # never stop until wall_time_limt wall_time_limit=kwargs.wall_time_limit, ) print("after test {}, cpu time: {}, diff: {}".format( kwargs.method, get_cpu_time(), get_cpu_time() - cpu_time)) elif kwargs.method == 'rl_prtr': model_env_name = get_model_env_name(kwargs.prtr_model_dir) assert model_env_name == prob_env_name opt = QLearning( k=prob_env.k,
def time_process(data_file): curr_time = dt.datetime.now() # run loop fobj = XMLParser(data_file, curr_time) lim = fobj.find_oldest_time() while curr_time > lim: curr_time -= TIME_INCR print('running time analysis for ' + str(curr_time)) fobj.update_time(curr_time) d = fobj.parse_to_dict() if d: net = NetworkParser(d) output("Analyzing File " + data_file + ' at time ' + str(curr_time)) na = NetworkAnalysis(net.G, os.path.basename(data_file), output_path, curr_time) basic = na.d3dump(public_out_path, str(curr_time)) # Run Decentralized Search try: if decentralized_search_settings[ "run_decentralized_search"]: hiearchyG = net.G.copy() category_hierarchy = CategoryBasedHierarchicalModel( hiearchyG, similarity_matrix_type= category_hierarchical_model_settings[ "similarity_matrix_type"], max_branching_factor_root= category_hierarchical_model_settings[ "max_branching_factor_root"]) category_hierarchy.build_hierarchical_model() decentralized_search_model = HierarchicalDecentralizedSearch( hiearchyG, category_hierarchy.hierarchy, na, detailed_print=decentralized_search_settings[ "detailed_print"], hierarchy_nodes_only=decentralized_search_settings[ "hierarchy_nodes_only"], apply_weighted_score=decentralized_search_settings[ "apply_weighted_score"], ) n_found, n_missing, av_path_len, av_unique_nodes, path_lengths_deciles = decentralized_search_model.run_decentralized_search( 1000, decentralized_search_settings["widen_search"], decentralized_search_settings["plots"]) basic.update({ "decentralized_num_paths_found": n_found, "decentralized_num_paths_missing": n_missing, "decentralized_average_decentralized_path_length": av_path_len, "decentralized_average_num_unique_nodes": av_unique_nodes, "hierarchy_num_nodes": (len(category_hierarchy.hierarchy.nodes()) - len(category_hierarchy.ranked_categories)), "hierarchy_num_levels": category_hierarchy.num_hierarchy_levels }) path_lengths_deciles_dict = {} for i in range(len(path_lengths_deciles)): path_lengths_deciles_dict["path_length_" + str((i + 1) * 10) + "_percentile"] = \ path_lengths_deciles[i] basic.update(path_lengths_deciles_dict) random_search_model = RandomSearch(net.G, na) n_found, n_missing, av_path_len, av_unique_nodes = random_search_model.run_search( 1000, decentralized_search_settings["widen_search"], decentralized_search_settings["plots"]) basic.update({ "random_num_paths_found": n_found, "random_num_paths_missing": n_missing, "random_average_decentralized_path_length": av_path_len, "random_average_num_unique_nodes": av_unique_nodes }) except: pass if generate_data: # write out decentralized results na.write_permanent_data_json(public_data, basic, str(curr_time.date())) output("Completed Analyzing: " + data_file)
def create_random_search(): search = RandomSearch() search.add_static_var("batch_size", 128) # Fashion MNIST converges around 25 epochs and CIFAR converges after 100 epochs search.add_static_var("epochs", 10) search.add_list("optimizer", ["sgd", "adam"]) search.add_power_range("num_filters_1", 5, 8, 2) # 32 64 128 256 search.add_power_range("num_filters_2", 4, 8, 2) # 16 32 64 128 256 search.add_power_range("num_filters_3", 4, 8, 2) # 16 32 64 128 256 search.add_step_range("filter_size_1", 2, 3, 1) search.add_step_range("filter_size_2", 2, 3, 1) search.add_step_range("filter_size_3", 2, 3, 1) search.add_step_range("pool_size_1", 2, 2, 1) search.add_step_range("pool_size_2", 2, 2, 1) search.add_step_range("pool_size_3", 2, 2, 1) search.add_step_range("dropout_1", 0.1, 0.9, 0.1) search.add_step_range("dropout_2", 0.1, 0.9, 0.1) search.add_step_range("dropout_3", 0.1, 0.9, 0.1) search.add_step_range("dropout_4", 0.1, 0.9, 0.1) search.add_power_range("dense_neurons_1", 6, 11, 2) # 64 128 256 512 1024 2048 search_count = 5 return search.create_random_search(search_count)
def main(): runs = 10 rounds = 5 chromosome_size = 23 population_size = 1000 data_set_name = 'bigfaultmatrix.txt' pwd = os.path.abspath(os.path.dirname(__file__)) data_set_path = os.path.join(pwd, data_set_name) parser = CSVParser(data_set_path) test_case_fault_matrix = parser.parse_data(True) ga = GeneticAlgorithm(test_case_fault_matrix, chromosome_size, population_size, rounds, 0.8, 0.08, 0.05, 0.75) ga.set_show_each_chromosome(False) ga.set_show_fitness_internals(False) ga.set_show_crossover_internals(False) ga.set_show_mutation_internals(False) ga.set_show_duplicate_internals(False) ga.set_silent(True) ga.run(runs) ga_fitness = ga.get_stats() for i in range(0, 2): if i == 0: hc = HillClimbing(test_case_fault_matrix, chromosome_size, population_size, rounds, False) else: hc = HillClimbing(test_case_fault_matrix, chromosome_size, population_size, rounds, True) hc.set_show_each_solution(False) hc.set_show_fitness_internals(False) hc.set_show_swapping_internals(False) hc.set_silent(True) hc.run(runs) if i == 0: hc_internal_fitness = hc.get_stats() else: hc_external_fitness = hc.get_stats() rs = RandomSearch(test_case_fault_matrix, chromosome_size, population_size, rounds) rs.set_show_each_solution(False) rs.set_silent(True) rs.run(runs) rs_fitness = rs.get_stats() rs_data = np.array(rs_fitness) hs_internal = np.array(hc_internal_fitness) hs_external = np.array(hc_external_fitness) ga_data = np.array(ga_fitness) # test_cases_per_test_suite = np.array([5, 10, 20, 23, 30, 50, 100]) # unique_large_apfd = np.array([0.4594736842105263, 0.6063157894736844, 0.6867105263157895, 0.6978260869565216, 0.7128947368421051, 0.7326842105263159, 0.7480263157894737]) # full_large_apfd = np.array([0.44631578947368417, 0.6023684210526316, 0.6846052631578947, 0.6958810068649884, 0.7122807017543858, 0.7320526315789474, 0.7476578947368421]) # plt.plot(test_cases_per_test_suite, unique_large_apfd, '-gD') # plt.xlabel("Test Cases per Test Suite") # plt.ylabel("Mean Fitness (APFD)") # plt.xticks(np.arange(min(test_cases_per_test_suite), max(test_cases_per_test_suite) + 1, 5.0)) # combine these different collections into a list data_to_plot = [rs_data, hs_internal, hs_external, ga_data] # Create a figure instance fig = plt.figure(1, figsize=(9, 6)) # Create an axes instance ax = fig.add_subplot(111) # add patch_artist=True option to ax.boxplot() bp = ax.boxplot(data_to_plot, patch_artist=True) # change outline color, fill color and linewidth of the boxes for box in bp['boxes']: # change outline color box.set(color='#7570b3', linewidth=2) # change fill color box.set(facecolor='#1b9e77') # change color and linewidth of the whiskers for whisker in bp['whiskers']: whisker.set(color='#7570b3', linewidth=2) # change color and linewidth of the caps for cap in bp['caps']: cap.set(color='#7570b3', linewidth=2) # change color and linewidth of the medians for median in bp['medians']: median.set(color='#b2df8a', linewidth=2) # change the style of fliers and their fill for flier in bp['fliers']: flier.set(marker='o', color='#e7298a', alpha=0.5) # Custom x-axis labels ax.set_xticklabels([ 'Random Search', 'HC Internal Swap', 'HC External Swap', 'Genetic Algorithm' ]) # Remove top axes and right axes ticks ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() # Save the figure graph_path = os.path.join(pwd, 'graph.pdf') pdf = PdfPages(graph_path) plt.savefig(pdf, format='pdf', bbox_inches='tight') plt.show() pdf.close()
dest='method', help="method to test", type="string", default="rl") (kwargs, args) = parser.parse_args() prob_env_name, prob_env_class = get_prob_env_name_class( kwargs.prob_env_dir) prob_env = prob_env_class.load(kwargs.prob_env_dir) if kwargs.method == 'random': # problem environment may not have fixed x_o. # however, we want to fix x_o for monto carlo random search prob_env.set_fixed_xo(prob_env.x_o) assert prob_env.if_set_fixed_xo() opt = RandomSearch(prob_env) _, opt_state, _, _, duration, call_counts = \ opt.random_search( iteration_limit=int(9e30), # never stop until wall_time_limt wall_time_limit=kwargs.wall_time_limit, ) start_x_o = prob_env.fixed_xo start_x_p = None # meaningless in random method opt_x_p = opt_state[prob_env.k:-1] # exclude the step # use noiseless output opt_val = prob_env.still(prob_env.output_noiseless(opt_state)) wall_time_limit = kwargs.wall_time_limit generation = call_counts # for random search, generation means call counts elif kwargs.method == 'rl_prtr': model_env_name = get_model_env_name(kwargs.prtr_model_dir) assert model_env_name == prob_env_name
def process_file(data_file): curr_time = get_time() # Parse Into Network d = XMLParser(data_file, get_time()).parse_to_dict() net = NetworkParser(d) # Graph Analysis output("Analyzing File " + data_file) na = NetworkAnalysis(net.G, os.path.basename(data_file), output_path) na.outputBasicStats() na.outputNodesAndEdges() # na.nodeRemoval() basic = na.d3dump(public_out_path, str(curr_time)) # Run Decentralized Search if decentralized_search_settings["run_decentralized_search"]: hiearchyG = net.G.copy() category_hierarchy = CategoryBasedHierarchicalModel( hiearchyG, similarity_matrix_type=category_hierarchical_model_settings[ "similarity_matrix_type"], max_branching_factor_root=category_hierarchical_model_settings[ "max_branching_factor_root"]) category_hierarchy.build_hierarchical_model() decentralized_search_model = HierarchicalDecentralizedSearch( hiearchyG, category_hierarchy.hierarchy, na, detailed_print=decentralized_search_settings["detailed_print"], hierarchy_nodes_only=decentralized_search_settings[ "hierarchy_nodes_only"], apply_weighted_score=decentralized_search_settings[ "apply_weighted_score"], ) n_found, n_missing, av_path_len, av_unique_nodes, path_lengths_deciles = decentralized_search_model.run_decentralized_search( 1000, decentralized_search_settings["widen_search"], decentralized_search_settings["plots"]) basic.update({ "decentralized_num_paths_found": n_found, "decentralized_num_paths_missing": n_missing, "decentralized_average_decentralized_path_length": av_path_len, "decentralized_average_num_unique_nodes": av_unique_nodes, "hierarchy_num_nodes": (len(category_hierarchy.hierarchy.nodes()) - len(category_hierarchy.ranked_categories)), "hierarchy_num_cat_nodes": len(category_hierarchy.ranked_categories), "hierarchy_num_levels": category_hierarchy.num_hierarchy_levels }) basic["hierarchy_ratio_cat_nodes"] = basic[ "hierarchy_num_cat_nodes"] / basic["hierarchy_num_nodes"] path_lengths_deciles_dict = {} for i in range(len(path_lengths_deciles)): path_lengths_deciles_dict["path_length_" + str( (i + 1) * 10) + "_percentile"] = path_lengths_deciles[i] basic.update(path_lengths_deciles_dict) random_search_model = RandomSearch(net.G, na) n_found, n_missing, av_path_len, av_unique_nodes = random_search_model.run_search( 1000, decentralized_search_settings["widen_search"], decentralized_search_settings["plots"]) basic.update({ "random_num_paths_found": n_found, "random_num_paths_missing": n_missing, "random_average_decentralized_path_length": av_path_len, "random_average_num_unique_nodes": av_unique_nodes }) if generate_data: na.write_permanent_data_json( public_data, basic) # write out decentralized results # na.generateDrawing() output("Completed Analyzing: " + data_file)
def benchmark(): REPEATS = 10 SECONDS = [5, 10, 30, 60, 300, 1200] for seconds in SECONDS: v = 0 time_s = datetime.now() for k in range(REPEATS): rs = RandomSearch(states, seconds, inc_support, dec_support) rs.run() v += rs.best_solution.value time_e = datetime.now() tt = (time_e - time_s).total_seconds() print_csv('Random Search', str(seconds), str(v / REPEATS), str(tt / REPEATS)) for seconds in SECONDS: v = 0 time_s = datetime.now() for k in range(REPEATS): ls = LocalSearch(states, seconds, inc_support, dec_support) ls.run() v += ls.best_solution.value time_e = datetime.now() tt = (time_e - time_s).total_seconds() print_csv('Local Search', str(seconds), str(v / REPEATS), str(tt / REPEATS)) for seconds in SECONDS: for initial_cadence in [10, 25, 50]: for critical_event in [10, 25, 50]: v = 0 time_s = datetime.now() for k in range(REPEATS): ts = TabuSearch(states, seconds, initial_cadence, critical_event, inc_support, dec_support) ts.run() v += ts.best_solution.value time_e = datetime.now() tt = (time_e - time_s).total_seconds() print_csv('Tabu Search', str(seconds), str(initial_cadence), str(critical_event), str(v / REPEATS), str(tt / REPEATS)) for crossover in ['pmx', 'ox']: for mutate in ['transposition', 'insertion', 'inversion']: for seconds in SECONDS: for population_size in [10, 25, 50]: v = 0 time_s = datetime.now() for k in range(REPEATS): ga = GeneticAlgorithm(states, seconds, population_size, crossover, mutate, inc_support, dec_support) ga.run() v += ga.best_solution.value time_e = datetime.now() tt = (time_e - time_s).total_seconds() print_csv('Genetic Algorithm ' + crossover + ' ' + mutate, str(seconds), str(population_size), str(v / REPEATS), str(tt / REPEATS)) for initial_temperature in [100, 500, 1000]: for cooling_coefficient in [0.9, 0.99, 0.999, 0.9999]: for minimal_temperature in [ initial_temperature * 0.25, initial_temperature * 0.5, initial_temperature * 0.75 ]: v = 0 time_s = datetime.now() for k in range(REPEATS): sa = SimulatedAnnealing(states, initial_temperature, cooling_coefficient, minimal_temperature, inc_support, dec_support) sa.run() v += sa.best_solution.value time_e = datetime.now() tt = (time_e - time_s).total_seconds() print_csv('Simulated Annealing', str(initial_temperature), str(cooling_coefficient), str(minimal_temperature), str(v / REPEATS), str(tt / REPEATS))