def main(args):
    if len(args) < 1:
        sys.stderr.write("Error - one required arguments: <data directory>\n")
        sys.exit(-1)

    working_dir = args[0]

    print("Reading data...")
    Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data(working_dir)

    Y_array = np.array(Y)
    # print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape)))

    num_examples, dimension = X_array.shape
    num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet)
    num_y_examples = len(Y)

    assert num_examples == num_y_examples

    Y_adj, indices = ctk_io.flatten_outputs(Y_array)

    train_x, valid_x, train_y, valid_y = train_test_split(X_array, Y_array, test_size=0.2, random_state=18)
    optim = RandomSearch(
        lambda: get_random_config(),
        lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feature_alphabet), num_outputs),
    )
    best_config = optim.optimize()

    print("Best config: %s" % best_config)
示例#2
0
def main(args):
    if len(args) < 1:
        sys.stderr.write("Error - one required arguments: <data directory>\n")
        sys.exit(-1)

    working_dir = args[0]

    print("Reading data...")
    Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data(
        working_dir)

    Y_array = np.array(Y)
    #print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape)))

    num_examples, dimension = X_array.shape
    num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet)
    num_y_examples = len(Y)

    assert num_examples == num_y_examples

    Y_adj, indices = ctk_io.flatten_outputs(Y_array)

    train_x, valid_x, train_y, valid_y = train_test_split(X_array,
                                                          Y_array,
                                                          test_size=0.2,
                                                          random_state=18)
    optim = RandomSearch(
        lambda: get_random_config(),
        lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y,
                                  len(feature_alphabet), num_outputs))
    best_config = optim.optimize()

    print("Best config: %s" % best_config)
def main(args):
    if len(args) < 1:
        sys.stderr.write("Error - one required arguments: <data directory>\n")
        sys.exit(-1)

    working_dir = args[0]

    print("Reading data...")
    Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data(working_dir)
    
    X_segments, dimensions = split_entity_data(X_array, feature_alphabet)
    Y_array = np.array(Y)
    Y_adj, indices = ctk_io.flatten_outputs(Y_array)
    
    num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet)
    num_y_examples = len(Y)
    
    train_x0, valid_x0, train_x1, valid_x1, train_x2, valid_x2, train_y, valid_y = train_test_split(X_segments[0], X_segments[1], X_segments[2], Y_array, test_size=0.2, random_state=18)
    train_x = [train_x0, train_x1, train_x2]
    valid_x = [valid_x0, valid_x1, valid_x2]
    
    optim = RandomSearch(lambda: get_random_config(), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feature_alphabet), num_outputs ) )
    best_config = optim.optimize()

    print("Best config: %s" % best_config)
 def __init__(self, problem_instance, random_state, population_size,
              selection, crossover, p_c, mutation, p_m, pressure):
     RandomSearch.__init__(self, problem_instance, random_state)
     self.population_size = population_size
     self.selection1 = selection
     self.selection2 = selection
     self.crossover1 = crossover
     self.crossover2 = crossover
     self.p_c2 = p_c
     self.p_c1 = p_c
     self.mutation1 = mutation
     self.mutation2 = mutation
     self.p_m = p_m
     self.p_m = p_m
     self.repetition1 = 0
     self.repetition2 = 0
     self.population_size1 = population_size / 2
     self.population_size2 = population_size / 2
     self.flag1 = False
     self.flag2 = False
     self.control = 2
     self.presure1 = pressure
     self.presure2 = pressure
     self.count = 0
     self.variation1 = 1
     self.variation2 = 1
def main(args):
    if len(args) < 1:
        sys.stderr.write("Error - one required arguments: <data directory>\n")
        sys.exit(-1)

    working_dir = args[0]

    print("Reading data...")
    Y, label_alphabet, X_array, feature_alphabet = ctk_io.read_token_sequence_data(
        working_dir)

    X_segments, dimensions = split_entity_data(X_array, feature_alphabet)
    Y_array = np.array(Y)
    Y_adj, indices = ctk_io.flatten_outputs(Y_array)

    num_outputs = 1 if len(label_alphabet) == 2 else len(label_alphabet)
    num_y_examples = len(Y)

    train_x0, valid_x0, train_x1, valid_x1, train_x2, valid_x2, train_y, valid_y = train_test_split(
        X_segments[0],
        X_segments[1],
        X_segments[2],
        Y_array,
        test_size=0.2,
        random_state=18)
    train_x = [train_x0, train_x1, train_x2]
    valid_x = [valid_x0, valid_x1, valid_x2]

    optim = RandomSearch(
        lambda: get_random_config(),
        lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y,
                                  len(feature_alphabet), num_outputs))
    best_config = optim.optimize()

    print("Best config: %s" % best_config)
 def __init__(self,
              problem_instance,
              random_state,
              neighborhood_size,
              neighborhood_function=bit_flip):
     RandomSearch.__init__(self, problem_instance, random_state)
     self.neighborhood_size = neighborhood_size
     self.neighborhood_function = neighborhood_function
示例#7
0
 def __init__(self, problem_instance, random_state, population_size,
              selection, crossover, p_c, mutation, p_m):
     RandomSearch.__init__(self, problem_instance, random_state)
     self.population_size = population_size
     self.selection = selection
     self.crossover = crossover
     self.p_c = p_c
     self.mutation = mutation
     self.p_m = p_m
def main(args):
    #np.random.seed(1337)
    if len(args) < 1:
        sys.stderr.write("Error - one required argument: <data directory>\n")
        sys.exit(-1)
    working_dir = args[0]
    data_file = os.path.join(working_dir, 'training-data.liblinear')

    # learn alphabet from training data
    provider = dataset.DatasetProvider(data_file)
    # now load training examples and labels
    train_x, train_y = provider.load(data_file)
    # turn x and y into numpy array among other things
    maxlen = max([len(seq) for seq in train_x])
    classes = len(set(train_y))

    train_x = pad_sequences(train_x, maxlen=maxlen)
    train_y = to_categorical(np.array(train_y), classes)

    #loading pre-trained embedding file:
    embeddings_index = {}
    f = open(os.path.join(working_dir, 'mimic.txt'))
    values = f.readline().split()
    EMBEDDING_WORDNUM = int(values[0])
    EMBEDDING_DIM = int(values[1])
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()
    print('load embeddings for %s=%s words.' %
          (len(embeddings_index), EMBEDDING_WORDNUM))

    # prepare embedding matrix
    nb_words = len(provider.word2int)
    embedding_matrix = np.zeros((nb_words, EMBEDDING_DIM))
    for word, i in provider.word2int.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:  # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

    print 'train_x shape:', train_x.shape
    print 'train_y shape:', train_y.shape

    #train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=0.1, random_state=18)

    optim = RandomSearch(
        lambda: get_random_config(), lambda x, y: run_one_eval(
            x, y, train_x, train_y, maxlen, len(provider.word2int), classes,
            embedding_matrix, EMBEDDING_DIM))
    best_config = optim.optimize()

    print("Best config: %s" % best_config)

    sys.exit(0)
def main(args):
    if len(args) < 1:
        sys.stderr.write("Error - one required argument: <data directory>\n")
        sys.exit(-1)

    working_dir = args[0]
   
    print("Reading data...")
    Y, outcome_map, outcome_list, X, feature_alphabet = ctk_io.read_multitask_token_sequence_data(working_dir)
    start_ind = feature_alphabet[start_symbol]
    end_ind = feature_alphabet[end_symbol]
    
    train_x, valid_x, train_y, valid_y = train_test_split(X, Y, test_size=0.2, random_state=7)

#    X_distance = get_distance_features(X, start_ind, end_ind)
    
    print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape)))
    
    num_examples, dimension = X.shape
    num_y_examples, num_labels = Y.shape
    assert num_examples == num_y_examples
    
    weights = None
    if len(args) > 1:
        weights = ctk_io.read_embeddings(args[1], feats_alphabet)
    
    train_y_adj, train_indices = ctk_io.flatten_outputs(train_y)
    valid_y_adj, valid_indices = ctk_io.flatten_outputs(valid_y)
    if not train_indices == valid_indices:
        print("Error: training and valid sets have different index sets -- may be missing some labels in one set or the other")
        sys.exit(-1)
           
    output_dims_list = []
    train_y_list = []
    valid_y_list = []
    indices = train_indices
    for i in range(len(indices)-1):
        label_dims = indices[i+1] - indices[i]
        output_dims_list.append(label_dims)
        if label_dims == 1:
            train_y_list.append(train_y_adj[:, indices[i]])
            valid_y_list.append(valid_y_adj[:, indices[i]])
        else:
            train_y_list.append(train_y_adj[:, indices[i]:indices[i+1]])
            valid_y_list.append(valid_y_adj[:, indices[i]:indices[i+1]])
        
        print("Dimensions of label %d are %s" % (i, str(train_y_list[-1].shape) ) )
    
    ## pass a function to the search that it uses to get a random config
    ## and a function that it will get an eval given (e)pochs and (c)onfig file:
    optim = RandomSearch(lambda: get_random_config(weights), lambda e, c: run_one_eval(e, c, train_x, train_y_list, valid_x, valid_y_list, len(feature_alphabet), output_dims_list, weights ) )
    best_config = optim.optimize(max_iter=27)

    open(os.path.join(working_dir, 'model_0.config'), 'w').write( str(best_config) )
    print("Best config returned by optimizer is %s" % str(best_config) )
示例#10
0
 def __init__(self, problem_instance, random_state, population_size,
              selection, crossover, p_c, mutation, p_m, presure):
     RandomSearch.__init__(self, problem_instance, random_state)
     self.population_size = population_size
     self.selection = selection
     self.crossover = crossover
     self.p_c = p_c
     self.mutation = mutation
     self.p_m = p_m
     self.presure = presure
     self.reproduttive_guys = []
     self.repetition = 0
示例#11
0
def main(args):
    
    if len(args) < 1:
        sys.stderr.write("Error - one required argument: <data directory> [(optional) weights file]\n")
        sys.exit(-1)

    working_dir = args[0]
    
    (labels, label_alphabet, feats, feats_alphabet) = ctk_io.read_bio_sequence_data(working_dir)
    
    weights = None
    if len(args) > 1:
        weights = ctk_io.read_embeddings(args[1], feats_alphabet)
        
    maxlen = max([len(seq) for seq in feats])
    all_x = pad_sequences(feats, maxlen=maxlen)
    all_y = ctk_io.expand_labels(pad_sequences(labels, maxlen=maxlen), label_alphabet)

    train_x, valid_x, train_y, valid_y = train_test_split(all_x, all_y, test_size=0.2, random_state=7)
    
    optim = RandomSearch(lambda: get_random_config(weights), lambda x, y: run_one_eval(x, y, train_x, train_y, valid_x, valid_y, len(feats_alphabet), len(label_alphabet), weights ) )
    best_config = optim.optimize()
    
    open(os.path.join(working_dir, 'model_0.config'), 'w').write( str(best_config) )
    print("Best config returned by optimizer is %s" % str(best_config) )
    
    if not best_config['pretrain']:
        weights = None
        
    model = get_model_for_config(train_x.shape, len(feats_alphabet), len(label_alphabet), best_config, weights=weights)

    model.fit(all_x,
            all_y,
            nb_epoch=40,
            batch_size=best_config['batch_size'],
            verbose=1,
            validation_split=0.1)

    model.summary()
    
    json_string = model.to_json()
    open(os.path.join(working_dir, 'model_0.json'), 'w').write(json_string)
    model.save_weights(os.path.join(working_dir, 'model_0.h5'), overwrite=True)
    
    fn = open(os.path.join(working_dir, 'alphabets.pkl'), 'w')
    pickle.dump( (feats_alphabet, label_alphabet), fn)
    fn.close()

    with ZipFile(os.path.join(working_dir, 'script.model'), 'w') as myzip:
        myzip.write(os.path.join(working_dir, 'model_0.json'), 'model_0.json')
        myzip.write(os.path.join(working_dir, 'model_0.h5'), 'model_0.h5')
        myzip.write(os.path.join(working_dir, 'alphabets.pkl'), 'alphabets.pkl')
def run_random_search(seconds):
    print('Running Random Search algoritm for ' + str(seconds) + ' seconds...')
    print()

    rs = RandomSearch(states, seconds, inc_support, dec_support)
    rs.run()
    print('Found optimal route with value of ' + str(rs.best_solution.value) +
          '.')
    print(
        str(rs.best_solution.calculate_real_value()) +
        ' electoral votes were collected.')
    rs.best_solution.print()
    print()
示例#13
0
 def __init__(self, problem_instance, random_state, population_size,
              selection, crossover, p_c, mutation, p_m):
     RandomSearch.__init__(self, problem_instance, random_state)
     self.population_size = population_size
     self.selection = selection
     self.crossover = crossover
     self.p_c = p_c
     self.mutation = mutation
     self.p_m = p_m
     self.repetition = 0
     self.presure = 0.2
     self.list = []
     self.list_iteration = []
示例#14
0
def main():
    target_string = "Hello World!"
    population_size = 1400
    runs = 10
    """
    The main method for the application. The Genetic Algorithm uses tournament
    selection as selection method and k-point or one-point crossover as
    crossover methods. The Genetic Algorithm constructor takes in the following parameters:
    @param: target_string The target string
    @param: population_size Amount of randomly generated strings
    @param: crossover_rate Crossover Rate in percentage (i.e. 1 = 100%)
    @param: mutation_rate Mutation Rate in percentage
    @param: is_k_point_crossover Choose whether to choose k-point crossover as
            crossover method. If false, one-point crossover is performed
    @param: tournament_size_percent Percentage of population to participate in
            tournaments for selection
    @param: strongest_winner_probability Probability of strongest participant
            in tournament to win, as well as the second strongest's probability
    """
    ga = GeneticAlgorithm(target_string, population_size, 0.8, 0.05, True,
                          0.05, 0.65)
    ga.set_show_each_chromosome(False)
    ga.set_show_crossover_internals(False)
    ga.set_show_mutation_internals(False)
    ga.set_silent(
        False)  # If False it shows the fittest chromosome of each generation
    ga.run(runs)
    ga.get_stats()
    """
    The Hill Climbing constructor takes in the following parameters:
    @param: target_string The target string
    @param: solutions_size Amount of solutions (strings) to search for a
            better solution in
    """
    hc = HillClimbing(target_string, population_size)
    hc.set_show_each_solution(False)
    hc.set_silent(True)
    hc.run(runs)
    hc.get_stats()
    """
    The Random Search constructor takes in the following parameters:
    @param: target_string The target string
    @param: solutions_size Amount of solutions (strings) generated randomly
            each round in which the solution is searched for
    """
    rs = RandomSearch(target_string, population_size)
    rs.set_show_each_solution(False)
    rs.set_silent(True)
    rs.run(runs)
    rs.get_stats()
示例#15
0
def main(parameters):
    # if (len(parameters) - 1) != 7:
    #     print("Missing required parameters: (regressor, input_file, \
    #           output_folder, max_iter, seed)")
    regressor = parameters[1]
    input_file = parameters[2]
    output_folder = parameters[3]
    max_iter = int(parameters[4])
    inner_seed = int(parameters[5])
    n_jobs = int(parameters[6])
    data_tag = parameters[7]
    outer_seed = int(parameters[8])
    tuning_seed = int(parameters[9])
    outer_fold = int(parameters[10])
    must_normalize = parameters[11] == 'True'

    output_folder = os.path.join(output_folder, regressor, data_tag,
                                 "outer_fold" + str(outer_fold))
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    data = pd.read_csv(input_file)
    # Removing ID column
    data = data.iloc[:, 1:]
    X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values

    kf = KFold(n_splits=10, random_state=outer_seed, shuffle=True)

    for k, (train_index, test_index) in enumerate(kf.split(X)):
        if k + 1 == outer_fold:
            X_train, y_train = X[train_index], y[train_index]
            # X_test, y_test = X[test_index], y[test_index]
            break

    rs = RandomSearch(get_search_space(algorithm=regressor),
                      max_iter=max_iter,
                      n_jobs=n_jobs,
                      random_state=tuning_seed)

    # best_conf =
    rs.fmin(objective=objective,
            predictor=get_regressor(algorithm=regressor),
            loss_func_tuning=RRMSE,
            X=X_train,
            y=y_train,
            seed=inner_seed,
            model_name=regressor,
            output_folder=output_folder,
            data_tag=data_tag,
            must_normalize=must_normalize)
示例#16
0
def main():
    exp_dir = 'search_{}_{}'.format(args.algorithm,
                                    time.strftime("%Y%m%d-%H%M%S"))
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)
    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(exp_dir, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    logging.info('args = %s', args)

    if args.algorithm == 'PPO' or args.algorithm == 'PG':
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)
        if torch.cuda.is_available():
            device = torch.device('cuda:{}'.format(str(args.gpu)))
            cudnn.benchmark = True
            cudnn.enable = True
            logging.info('using gpu : {}'.format(args.gpu))
            torch.cuda.manual_seed(args.seed)
        else:
            device = torch.device('cpu')
            logging.info('using cpu')

        if args.algorithm == 'PPO':
            ppo = PPO(args, device)
            ppo.multi_solve_environment()
        elif args.algorithm == 'PG':
            pg = PolicyGradient(args, device)
            pg.multi_solve_environment()

    else:
        rs = RandomSearch(args)
        rs.multi_solve_environment()
示例#17
0
    def __init__(self, env):
        super().__init__(env)

        # Initial generation size
        self.initial_gen_size = INITIAL_GEN_SIZE
        # Number of generations to run for
        self.num_generations = NUM_GENERATIONS
        # Number of parents to keep each generation
        self.num_elite = NUM_ELITE
        # Number of children each generation
        self.num_children_per_parent_comb = NUM_CHILDREN_PER_PARENT_COMB
        # Probability of mutation
        self.mutation_prob = MUTATION_PROB

        # Use random policy generator from RandomSearch learner
        self.get_policy = RandomSearch(env).get_policy
示例#18
0
    base = os.environ['DATA_ROOT']
    train_dir = os.path.join(base, cfg.get('data', 'train'))
    code_file = os.path.join(base, cfg.get('data', 'codes'))

    provider = dataset.DatasetProvider(train_dir,
                                       code_file,
                                       cfg.getint('args', 'min_token_freq'),
                                       cfg.getint('args',
                                                  'max_tokens_in_file'),
                                       cfg.getint('args',
                                                  'min_examples_per_code'),
                                       use_cuis=False)
    x, y = provider.load(tokens_as_set=False)

    maxlen = max([len(seq) for seq in x])
    x = pad_sequences(x, maxlen=maxlen)
    y = np.array(y)

    print('x shape:', x.shape)
    print('y shape:', y.shape)
    print('max seq len:', maxlen)
    print('vocab size:', x.max() + 1)
    print('number of features:', len(provider.token2int))
    print('number of labels:', len(provider.code2int))

    model = CnnCodePredictionModel()
    search = RandomSearch(model, x, y)
    best_config = search.optimize(max_iter=64)
    print('best config:', best_config)
示例#19
0
def main(args):

    if len(args) < 1:
        sys.stderr.write(
            "Error - one required argument: <data directory> [(optional) weights file]\n"
        )
        sys.exit(-1)

    working_dir = args[0]

    (labels, label_alphabet, feats,
     feats_alphabet) = ctk_io.read_bio_sequence_data(working_dir)

    weights = None
    if len(args) > 1:
        weights = ctk_io.read_embeddings(args[1], feats_alphabet)

    maxlen = max([len(seq) for seq in feats])
    all_x = pad_sequences(feats, maxlen=maxlen)
    all_y = ctk_io.expand_labels(pad_sequences(labels, maxlen=maxlen),
                                 label_alphabet)

    train_x, valid_x, train_y, valid_y = train_test_split(all_x,
                                                          all_y,
                                                          test_size=0.2,
                                                          random_state=7)

    optim = RandomSearch(
        lambda: get_random_config(weights), lambda x, y: run_one_eval(
            x, y, train_x, train_y, valid_x, valid_y, len(feats_alphabet),
            len(label_alphabet), weights))
    best_config = optim.optimize()

    open(os.path.join(working_dir, 'model_0.config'),
         'w').write(str(best_config))
    print("Best config returned by optimizer is %s" % str(best_config))

    if not best_config['pretrain']:
        weights = None

    model = get_model_for_config(train_x.shape,
                                 len(feats_alphabet),
                                 len(label_alphabet),
                                 best_config,
                                 weights=weights)

    model.fit(all_x,
              all_y,
              nb_epoch=40,
              batch_size=best_config['batch_size'],
              verbose=1,
              validation_split=0.1)

    model.summary()

    json_string = model.to_json()
    open(os.path.join(working_dir, 'model_0.json'), 'w').write(json_string)
    model.save_weights(os.path.join(working_dir, 'model_0.h5'), overwrite=True)

    fn = open(os.path.join(working_dir, 'alphabets.pkl'), 'w')
    pickle.dump((feats_alphabet, label_alphabet), fn)
    fn.close()

    with ZipFile(os.path.join(working_dir, 'script.model'), 'w') as myzip:
        myzip.write(os.path.join(working_dir, 'model_0.json'), 'model_0.json')
        myzip.write(os.path.join(working_dir, 'model_0.h5'), 'model_0.h5')
        myzip.write(os.path.join(working_dir, 'alphabets.pkl'),
                    'alphabets.pkl')
示例#20
0
文件: main.py 项目: renatotnk/UDESC
from annealing import Annealer
from sat3cnf import SAT3CNF
from random_search import RandomSearch
import numpy as np


def imprime_solucao(solucao):
    for (c, v) in solucao.items():
        print('Clausula %s: %d' % (c, v))


if __name__ == '__main__':
    files = ['uf20-01.cnf', 'uf100-01.cnf', 'uf250-01.cnf']
    for fname in files:
        print("Resolvendo ", fname)
        sat = SAT3CNF(fname)
        print(str(sat.n_clausulas) + ' clausulas')

        sim_annealing = Annealer(sat)
        print("\nSimulated Annealing")
        s_ann = sim_annealing.resolver()[0]

        random_search = RandomSearch(sat)
        print("\nRandom Search")
        s_random = random_search.resolver()
        imprime_solucao(s_ann)

        print("\n\n")
示例#21
0
def main(args):
    if len(args) < 1:
        sys.stderr.write("Error - one required argument: <data directory>\n")
        sys.exit(-1)

    working_dir = args[0]

    print("Reading data...")
    Y, outcome_map, outcome_list, X, feature_alphabet = ctk_io.read_multitask_token_sequence_data(
        working_dir)
    start_ind = feature_alphabet[start_symbol]
    end_ind = feature_alphabet[end_symbol]

    train_x, valid_x, train_y, valid_y = train_test_split(X,
                                                          Y,
                                                          test_size=0.2,
                                                          random_state=7)

    #    X_distance = get_distance_features(X, start_ind, end_ind)

    print("Shape of X is %s and Y is %s" % (str(X.shape), str(Y.shape)))

    num_examples, dimension = X.shape
    num_y_examples, num_labels = Y.shape
    assert num_examples == num_y_examples

    weights = None
    if len(args) > 1:
        weights = ctk_io.read_embeddings(args[1], feats_alphabet)

    train_y_adj, train_indices = ctk_io.flatten_outputs(train_y)
    valid_y_adj, valid_indices = ctk_io.flatten_outputs(valid_y)
    if not train_indices == valid_indices:
        print(
            "Error: training and valid sets have different index sets -- may be missing some labels in one set or the other"
        )
        sys.exit(-1)

    output_dims_list = []
    train_y_list = []
    valid_y_list = []
    indices = train_indices
    for i in range(len(indices) - 1):
        label_dims = indices[i + 1] - indices[i]
        output_dims_list.append(label_dims)
        if label_dims == 1:
            train_y_list.append(train_y_adj[:, indices[i]])
            valid_y_list.append(valid_y_adj[:, indices[i]])
        else:
            train_y_list.append(train_y_adj[:, indices[i]:indices[i + 1]])
            valid_y_list.append(valid_y_adj[:, indices[i]:indices[i + 1]])

        print("Dimensions of label %d are %s" %
              (i, str(train_y_list[-1].shape)))

    ## pass a function to the search that it uses to get a random config
    ## and a function that it will get an eval given (e)pochs and (c)onfig file:
    optim = RandomSearch(
        lambda: get_random_config(weights), lambda e, c: run_one_eval(
            e, c, train_x, train_y_list, valid_x, valid_y_list,
            len(feature_alphabet), output_dims_list, weights))
    best_config = optim.optimize(max_iter=27)

    open(os.path.join(working_dir, 'model_0.config'),
         'w').write(str(best_config))
    print("Best config returned by optimizer is %s" % str(best_config))
示例#22
0
                      help="method to test",
                      type="string",
                      default="rl")
    (kwargs, args) = parser.parse_args()

    prob_env_name, prob_env_class = get_prob_env_name_class(
        kwargs.prob_env_dir)
    prob_env = prob_env_class.load(kwargs.prob_env_dir)

    if kwargs.method == 'random':
        # problem environment has not fixed x_o.
        # however, we want to fix x_o for monto carlo random search
        assert not prob_env.if_set_fixed_xo()
        prob_env.set_fixed_xo(prob_env.x_o)
        assert prob_env.if_set_fixed_xo()
        opt = RandomSearch(prob_env)
        cpu_time = get_cpu_time()
        print("before test {}, cpu time: {}".format(kwargs.method, cpu_time))
        _, opt_state, _, _, duration, call_counts = \
            opt.random_search(
                iteration_limit=int(9e30),  # never stop until wall_time_limt
                wall_time_limit=kwargs.wall_time_limit,
            )
        print("after test {}, cpu time: {}, diff: {}".format(
            kwargs.method, get_cpu_time(),
            get_cpu_time() - cpu_time))
    elif kwargs.method == 'rl_prtr':
        model_env_name = get_model_env_name(kwargs.prtr_model_dir)
        assert model_env_name == prob_env_name
        opt = QLearning(
            k=prob_env.k,
示例#23
0
    def time_process(data_file):
        curr_time = dt.datetime.now()
        # run loop
        fobj = XMLParser(data_file, curr_time)
        lim = fobj.find_oldest_time()
        while curr_time > lim:
            curr_time -= TIME_INCR
            print('running time analysis for ' + str(curr_time))
            fobj.update_time(curr_time)
            d = fobj.parse_to_dict()
            if d:
                net = NetworkParser(d)
                output("Analyzing File " + data_file + ' at time ' +
                       str(curr_time))
                na = NetworkAnalysis(net.G, os.path.basename(data_file),
                                     output_path, curr_time)

                basic = na.d3dump(public_out_path, str(curr_time))

                # Run Decentralized Search
                try:
                    if decentralized_search_settings[
                            "run_decentralized_search"]:
                        hiearchyG = net.G.copy()
                        category_hierarchy = CategoryBasedHierarchicalModel(
                            hiearchyG,
                            similarity_matrix_type=
                            category_hierarchical_model_settings[
                                "similarity_matrix_type"],
                            max_branching_factor_root=
                            category_hierarchical_model_settings[
                                "max_branching_factor_root"])
                        category_hierarchy.build_hierarchical_model()
                        decentralized_search_model = HierarchicalDecentralizedSearch(
                            hiearchyG,
                            category_hierarchy.hierarchy,
                            na,
                            detailed_print=decentralized_search_settings[
                                "detailed_print"],
                            hierarchy_nodes_only=decentralized_search_settings[
                                "hierarchy_nodes_only"],
                            apply_weighted_score=decentralized_search_settings[
                                "apply_weighted_score"],
                        )
                        n_found, n_missing, av_path_len, av_unique_nodes, path_lengths_deciles = decentralized_search_model.run_decentralized_search(
                            1000,
                            decentralized_search_settings["widen_search"],
                            decentralized_search_settings["plots"])
                        basic.update({
                            "decentralized_num_paths_found":
                            n_found,
                            "decentralized_num_paths_missing":
                            n_missing,
                            "decentralized_average_decentralized_path_length":
                            av_path_len,
                            "decentralized_average_num_unique_nodes":
                            av_unique_nodes,
                            "hierarchy_num_nodes":
                            (len(category_hierarchy.hierarchy.nodes()) -
                             len(category_hierarchy.ranked_categories)),
                            "hierarchy_num_levels":
                            category_hierarchy.num_hierarchy_levels
                        })

                        path_lengths_deciles_dict = {}
                        for i in range(len(path_lengths_deciles)):
                            path_lengths_deciles_dict["path_length_" + str((i + 1) * 10) + "_percentile"] = \
                                path_lengths_deciles[i]
                        basic.update(path_lengths_deciles_dict)

                        random_search_model = RandomSearch(net.G, na)
                        n_found, n_missing, av_path_len, av_unique_nodes = random_search_model.run_search(
                            1000,
                            decentralized_search_settings["widen_search"],
                            decentralized_search_settings["plots"])
                        basic.update({
                            "random_num_paths_found":
                            n_found,
                            "random_num_paths_missing":
                            n_missing,
                            "random_average_decentralized_path_length":
                            av_path_len,
                            "random_average_num_unique_nodes":
                            av_unique_nodes
                        })
                except:
                    pass

                if generate_data:  # write out decentralized results
                    na.write_permanent_data_json(public_data, basic,
                                                 str(curr_time.date()))

        output("Completed Analyzing: " + data_file)
示例#24
0
def create_random_search():

    search = RandomSearch()
    search.add_static_var("batch_size", 128)

    # Fashion MNIST converges around 25 epochs and CIFAR converges after 100 epochs
    search.add_static_var("epochs", 10)

    search.add_list("optimizer", ["sgd", "adam"])

    search.add_power_range("num_filters_1", 5, 8, 2)  # 32 64 128 256
    search.add_power_range("num_filters_2", 4, 8, 2)  # 16 32 64 128 256
    search.add_power_range("num_filters_3", 4, 8, 2)  # 16 32 64 128 256
    search.add_step_range("filter_size_1", 2, 3, 1)
    search.add_step_range("filter_size_2", 2, 3, 1)
    search.add_step_range("filter_size_3", 2, 3, 1)
    search.add_step_range("pool_size_1", 2, 2, 1)
    search.add_step_range("pool_size_2", 2, 2, 1)
    search.add_step_range("pool_size_3", 2, 2, 1)
    search.add_step_range("dropout_1", 0.1, 0.9, 0.1)
    search.add_step_range("dropout_2", 0.1, 0.9, 0.1)
    search.add_step_range("dropout_3", 0.1, 0.9, 0.1)
    search.add_step_range("dropout_4", 0.1, 0.9, 0.1)
    search.add_power_range("dense_neurons_1", 6, 11,
                           2)  # 64 128 256 512 1024 2048

    search_count = 5
    return search.create_random_search(search_count)
示例#25
0
def main():
    runs = 10
    rounds = 5
    chromosome_size = 23
    population_size = 1000
    data_set_name = 'bigfaultmatrix.txt'

    pwd = os.path.abspath(os.path.dirname(__file__))
    data_set_path = os.path.join(pwd, data_set_name)
    parser = CSVParser(data_set_path)
    test_case_fault_matrix = parser.parse_data(True)

    ga = GeneticAlgorithm(test_case_fault_matrix, chromosome_size,
                          population_size, rounds, 0.8, 0.08, 0.05, 0.75)
    ga.set_show_each_chromosome(False)
    ga.set_show_fitness_internals(False)
    ga.set_show_crossover_internals(False)
    ga.set_show_mutation_internals(False)
    ga.set_show_duplicate_internals(False)
    ga.set_silent(True)
    ga.run(runs)
    ga_fitness = ga.get_stats()

    for i in range(0, 2):
        if i == 0:
            hc = HillClimbing(test_case_fault_matrix, chromosome_size,
                              population_size, rounds, False)
        else:
            hc = HillClimbing(test_case_fault_matrix, chromosome_size,
                              population_size, rounds, True)
        hc.set_show_each_solution(False)
        hc.set_show_fitness_internals(False)
        hc.set_show_swapping_internals(False)
        hc.set_silent(True)
        hc.run(runs)
        if i == 0:
            hc_internal_fitness = hc.get_stats()
        else:
            hc_external_fitness = hc.get_stats()

    rs = RandomSearch(test_case_fault_matrix, chromosome_size, population_size,
                      rounds)
    rs.set_show_each_solution(False)
    rs.set_silent(True)
    rs.run(runs)
    rs_fitness = rs.get_stats()

    rs_data = np.array(rs_fitness)
    hs_internal = np.array(hc_internal_fitness)
    hs_external = np.array(hc_external_fitness)
    ga_data = np.array(ga_fitness)

    # test_cases_per_test_suite = np.array([5, 10, 20, 23, 30, 50, 100])
    # unique_large_apfd = np.array([0.4594736842105263, 0.6063157894736844, 0.6867105263157895, 0.6978260869565216, 0.7128947368421051, 0.7326842105263159, 0.7480263157894737])
    # full_large_apfd = np.array([0.44631578947368417, 0.6023684210526316, 0.6846052631578947, 0.6958810068649884, 0.7122807017543858, 0.7320526315789474, 0.7476578947368421])

    # plt.plot(test_cases_per_test_suite, unique_large_apfd, '-gD')
    # plt.xlabel("Test Cases per Test Suite")
    # plt.ylabel("Mean Fitness (APFD)")
    # plt.xticks(np.arange(min(test_cases_per_test_suite), max(test_cases_per_test_suite) + 1, 5.0))

    # combine these different collections into a list
    data_to_plot = [rs_data, hs_internal, hs_external, ga_data]

    # Create a figure instance
    fig = plt.figure(1, figsize=(9, 6))

    # Create an axes instance
    ax = fig.add_subplot(111)

    # add patch_artist=True option to ax.boxplot()
    bp = ax.boxplot(data_to_plot, patch_artist=True)

    # change outline color, fill color and linewidth of the boxes
    for box in bp['boxes']:
        # change outline color
        box.set(color='#7570b3', linewidth=2)
        # change fill color
        box.set(facecolor='#1b9e77')

    # change color and linewidth of the whiskers
    for whisker in bp['whiskers']:
        whisker.set(color='#7570b3', linewidth=2)

    # change color and linewidth of the caps
    for cap in bp['caps']:
        cap.set(color='#7570b3', linewidth=2)

    # change color and linewidth of the medians
    for median in bp['medians']:
        median.set(color='#b2df8a', linewidth=2)

    # change the style of fliers and their fill
    for flier in bp['fliers']:
        flier.set(marker='o', color='#e7298a', alpha=0.5)

    # Custom x-axis labels
    ax.set_xticklabels([
        'Random Search', 'HC Internal Swap', 'HC External Swap',
        'Genetic Algorithm'
    ])

    # Remove top axes and right axes ticks
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()

    # Save the figure
    graph_path = os.path.join(pwd, 'graph.pdf')
    pdf = PdfPages(graph_path)
    plt.savefig(pdf, format='pdf', bbox_inches='tight')
    plt.show()
    pdf.close()
示例#26
0
                      dest='method',
                      help="method to test",
                      type="string",
                      default="rl")
    (kwargs, args) = parser.parse_args()

    prob_env_name, prob_env_class = get_prob_env_name_class(
        kwargs.prob_env_dir)
    prob_env = prob_env_class.load(kwargs.prob_env_dir)

    if kwargs.method == 'random':
        # problem environment may not have fixed x_o.
        # however, we want to fix x_o for monto carlo random search
        prob_env.set_fixed_xo(prob_env.x_o)
        assert prob_env.if_set_fixed_xo()
        opt = RandomSearch(prob_env)
        _, opt_state, _, _, duration, call_counts = \
            opt.random_search(
                iteration_limit=int(9e30),  # never stop until wall_time_limt
                wall_time_limit=kwargs.wall_time_limit,
            )
        start_x_o = prob_env.fixed_xo
        start_x_p = None  # meaningless in random method
        opt_x_p = opt_state[prob_env.k:-1]  # exclude the step
        # use noiseless output
        opt_val = prob_env.still(prob_env.output_noiseless(opt_state))
        wall_time_limit = kwargs.wall_time_limit
        generation = call_counts  # for random search, generation means call counts
    elif kwargs.method == 'rl_prtr':
        model_env_name = get_model_env_name(kwargs.prtr_model_dir)
        assert model_env_name == prob_env_name
示例#27
0
    def process_file(data_file):
        curr_time = get_time()
        # Parse Into Network
        d = XMLParser(data_file, get_time()).parse_to_dict()
        net = NetworkParser(d)
        # Graph Analysis
        output("Analyzing File " + data_file)
        na = NetworkAnalysis(net.G, os.path.basename(data_file), output_path)
        na.outputBasicStats()
        na.outputNodesAndEdges()
        # na.nodeRemoval()

        basic = na.d3dump(public_out_path, str(curr_time))

        # Run Decentralized Search
        if decentralized_search_settings["run_decentralized_search"]:
            hiearchyG = net.G.copy()
            category_hierarchy = CategoryBasedHierarchicalModel(
                hiearchyG,
                similarity_matrix_type=category_hierarchical_model_settings[
                    "similarity_matrix_type"],
                max_branching_factor_root=category_hierarchical_model_settings[
                    "max_branching_factor_root"])
            category_hierarchy.build_hierarchical_model()
            decentralized_search_model = HierarchicalDecentralizedSearch(
                hiearchyG,
                category_hierarchy.hierarchy,
                na,
                detailed_print=decentralized_search_settings["detailed_print"],
                hierarchy_nodes_only=decentralized_search_settings[
                    "hierarchy_nodes_only"],
                apply_weighted_score=decentralized_search_settings[
                    "apply_weighted_score"],
            )
            n_found, n_missing, av_path_len, av_unique_nodes, path_lengths_deciles = decentralized_search_model.run_decentralized_search(
                1000, decentralized_search_settings["widen_search"],
                decentralized_search_settings["plots"])
            basic.update({
                "decentralized_num_paths_found":
                n_found,
                "decentralized_num_paths_missing":
                n_missing,
                "decentralized_average_decentralized_path_length":
                av_path_len,
                "decentralized_average_num_unique_nodes":
                av_unique_nodes,
                "hierarchy_num_nodes":
                (len(category_hierarchy.hierarchy.nodes()) -
                 len(category_hierarchy.ranked_categories)),
                "hierarchy_num_cat_nodes":
                len(category_hierarchy.ranked_categories),
                "hierarchy_num_levels":
                category_hierarchy.num_hierarchy_levels
            })
            basic["hierarchy_ratio_cat_nodes"] = basic[
                "hierarchy_num_cat_nodes"] / basic["hierarchy_num_nodes"]

            path_lengths_deciles_dict = {}
            for i in range(len(path_lengths_deciles)):
                path_lengths_deciles_dict["path_length_" + str(
                    (i + 1) * 10) + "_percentile"] = path_lengths_deciles[i]
            basic.update(path_lengths_deciles_dict)

            random_search_model = RandomSearch(net.G, na)
            n_found, n_missing, av_path_len, av_unique_nodes = random_search_model.run_search(
                1000, decentralized_search_settings["widen_search"],
                decentralized_search_settings["plots"])
            basic.update({
                "random_num_paths_found": n_found,
                "random_num_paths_missing": n_missing,
                "random_average_decentralized_path_length": av_path_len,
                "random_average_num_unique_nodes": av_unique_nodes
            })

        if generate_data:
            na.write_permanent_data_json(
                public_data, basic)  # write out decentralized results

        # na.generateDrawing()

        output("Completed Analyzing: " + data_file)
def benchmark():
    REPEATS = 10
    SECONDS = [5, 10, 30, 60, 300, 1200]

    for seconds in SECONDS:
        v = 0
        time_s = datetime.now()
        for k in range(REPEATS):
            rs = RandomSearch(states, seconds, inc_support, dec_support)
            rs.run()
            v += rs.best_solution.value
        time_e = datetime.now()
        tt = (time_e - time_s).total_seconds()
        print_csv('Random Search', str(seconds), str(v / REPEATS),
                  str(tt / REPEATS))

    for seconds in SECONDS:
        v = 0
        time_s = datetime.now()
        for k in range(REPEATS):
            ls = LocalSearch(states, seconds, inc_support, dec_support)
            ls.run()
            v += ls.best_solution.value
        time_e = datetime.now()
        tt = (time_e - time_s).total_seconds()
        print_csv('Local Search', str(seconds), str(v / REPEATS),
                  str(tt / REPEATS))

    for seconds in SECONDS:
        for initial_cadence in [10, 25, 50]:
            for critical_event in [10, 25, 50]:
                v = 0
                time_s = datetime.now()
                for k in range(REPEATS):
                    ts = TabuSearch(states, seconds, initial_cadence,
                                    critical_event, inc_support, dec_support)
                    ts.run()
                    v += ts.best_solution.value
                time_e = datetime.now()
                tt = (time_e - time_s).total_seconds()
                print_csv('Tabu Search', str(seconds), str(initial_cadence),
                          str(critical_event), str(v / REPEATS),
                          str(tt / REPEATS))

    for crossover in ['pmx', 'ox']:
        for mutate in ['transposition', 'insertion', 'inversion']:
            for seconds in SECONDS:
                for population_size in [10, 25, 50]:
                    v = 0
                    time_s = datetime.now()
                    for k in range(REPEATS):
                        ga = GeneticAlgorithm(states, seconds, population_size,
                                              crossover, mutate, inc_support,
                                              dec_support)
                        ga.run()
                        v += ga.best_solution.value
                    time_e = datetime.now()
                    tt = (time_e - time_s).total_seconds()
                    print_csv('Genetic Algorithm ' + crossover + ' ' + mutate,
                              str(seconds), str(population_size),
                              str(v / REPEATS), str(tt / REPEATS))

    for initial_temperature in [100, 500, 1000]:
        for cooling_coefficient in [0.9, 0.99, 0.999, 0.9999]:
            for minimal_temperature in [
                    initial_temperature * 0.25, initial_temperature * 0.5,
                    initial_temperature * 0.75
            ]:
                v = 0
                time_s = datetime.now()
                for k in range(REPEATS):
                    sa = SimulatedAnnealing(states, initial_temperature,
                                            cooling_coefficient,
                                            minimal_temperature, inc_support,
                                            dec_support)
                    sa.run()
                    v += sa.best_solution.value
                time_e = datetime.now()
                tt = (time_e - time_s).total_seconds()
                print_csv('Simulated Annealing', str(initial_temperature),
                          str(cooling_coefficient), str(minimal_temperature),
                          str(v / REPEATS), str(tt / REPEATS))