Exemplo n.º 1
0
def main():
    vrb_main = GLOBAL_VERBOSITY_FLAG
    """
    Entry Point for Program
    """
    all_tests_successful = True
    is_solution_list = []
    final_chains_list = []
    atom_positions_list = []

    for INPUT_FILE_NAME in TEST_FILE_NAMES:
        # The loaded data are NOT numpy arrays (change later?)
        (atom_types, atom_positions) = read_xyz_file(str(INPUT_FILE_NAME))
        (region_list, interaction_distances) = \
            read_transport_file(str(INPUT_FILE_NAME))

        if LOAD_CACHE_DATA:
            data = load_data(INPUT_FILE_NAME)
        else:
            data = prep_data(atom_types, atom_positions, region_list,
                             interaction_distances)
            chache_data(INPUT_FILE_NAME, data)

        atom_positions_list.append(atom_positions)
        dist_mtrx = data["dist_mtrx"]
        interact_mtrx = data["interact_mtrx"]

        num_atoms = np.size(dist_mtrx, axis=0)

        # Didn't convert to np array in file_io because this complicates caching
        # (since numpy arrays non json-serialzable)
        numpy_region_list = []
        for region in region_list:
            numpy_region_list.append(np.array(region))

        device = numpy_region_list[0]
        contacts = numpy_region_list[1:]

        print_var(device, vrb=vrb_main)
        print_var(contacts, vrb=vrb_main)

        contact_bins = get_contact_bins(device, contacts, interact_mtrx)
        num_unlisted_contact_atoms = \
            count_atoms([contacts]) - count_atoms([contact_bins])
        prev_bins = list.copy(contacts)

        # Each element in "chains" is a list of bins. Each of these lists
        # contains the bins of a specific generation. The bins are sorted in the
        # order of ascending contact indices.
        # All bins that are the same number of steps away from the contacts are
        # assigned to the same "generation", the atoms in "contact_bins" are
        # in generation zero.
        # "contact_bins": All contact atoms that are interacting with the device
        # are assigned to this bin.

        chains = []
        chains.append(contact_bins)
        # print("bin_generations" + str(bin_generations))
        num_chains = len(contacts)
        curr_gen_idx = 1

        final_collision_found = False
        final_chain_idxs = []
        gen_idx_of_last_collision = -1

        # This condition is a failsafe, to avoid infinite loops
        while curr_gen_idx < MAX_GENERATIONS:
            collisions_found = []
            if vrb_main: print(curr_gen_idx)
            curr_gen = get_next_bins(chains[-1], prev_bins, interact_mtrx)

            chains.append(curr_gen)
            prev_bins = prev_bins + curr_gen

            if vrb_main:
                print("\n Chains before merge step ")
                print_generations(chains)

            if not final_collision_found:
                for chain1_idx, bn1 in enumerate(curr_gen):
                    for chain2_idx, bn2 in enumerate(curr_gen):
                        if chain2_idx > chain1_idx:
                            if bins_are_neighbours(bn1, bn2, interact_mtrx):
                                if num_chains > 2:
                                    collisions_found.append(
                                        (chain1_idx, chain2_idx))
                                    num_chains -= 1
                                    if vrb_main:
                                        print("collisions_found: " +
                                              str(collisions_found))
                                        print("num_chains = " +
                                              str(num_chains))

                                else:
                                    if num_chains < 2:
                                        sys.exit("FATAL ERROR: num_chains < 2")

                                    final_collision_found = True
                                    final_chain_idxs = [chain1_idx, chain2_idx]
                                    gen_idx_of_last_collision = curr_gen_idx
                                    remove_duplicates_from_all_tips(chains)

                                    if vrb_main:
                                        print(
                                            "\n ---- final_collision_found! ---- \n"
                                        )
                                        print("gen_idx_of_last_collision = " +
                                              str(gen_idx_of_last_collision))
                                        print("final_chain_idxs: " +
                                              str(final_chain_idxs))

                        if final_collision_found:
                            break
                    if final_collision_found:
                        break

            for col_tuple in collisions_found:
                # Merge from src_chain_idx into target_chain_idx
                src_chain_idx = col_tuple[0]
                target_chain_idx = col_tuple[1]

                if col_tuple[0] in final_chain_idxs:
                    if col_tuple[1] in final_chain_idxs:
                        sys.exit("FATAL ERROR: Should never merge the two \
                                 final chains into eachother.")

                # Make sure we are merging into the final chain.
                # If not, swap src_chain_idx with target_chain_idx.
                if col_tuple[0] in final_chain_idxs:
                    src_chain_idx = col_tuple[1]
                    target_chain_idx = col_tuple[0]

                # Merge chains
                if vrb_main:
                    print("Merge chain_idxs:" + str(col_tuple))
                    print("src_chain bin: " +
                          str([x + 1 for x in curr_gen[src_chain_idx]]))
                    print("target_chain bin: " +
                          str([x + 1 for x in curr_gen[target_chain_idx]]))

                ########################################
                # CONSIDER: FOR MULTIPLE COLLISIONS, TRY TO MERGE SMALLER CHAINS TOGETHER FIRST
                ########################################
                # Duplicates have to be removed AFTER collision recognition,
                # since otherwise this could prevent finding collisions
                remove_duplicates_from_all_tips(chains)
                (chains, contacts) = merge(chains, contacts, curr_gen_idx,
                                           target_chain_idx, src_chain_idx)

                if vrb_main:
                    print("\n Chains after merge step: ")
                    print_generations(chains)

            remove_duplicates_from_all_tips(chains)
            num_sorted_atoms = count_atoms(chains) + num_unlisted_contact_atoms

            if num_sorted_atoms >= num_atoms:
                if num_sorted_atoms > num_atoms:
                    sys.exit("FATAL ERROR: num_sorted_atoms > num_atoms")
                if vrb_main: print("All atoms sorted.")
                break
            curr_gen_idx += 1

        if curr_gen_idx >= MAX_GENERATIONS:
            sys.exit(
                "FATAL ERROR: MAX_GENERATIONS exceeded. (Increase MAX_GENERATIONS?)"
            )

        if not final_collision_found:
            sys.exit(
                "FATAL ERROR: No final collision found, don't know which chains to keep"
            )

        if vrb_main:
            print("\n Chain before culling dead ends: ")
            print_generations(chains)

        #Find dead ends in the two final chains
        dead_ends = get_dead_ends(chains, final_chain_idxs,
                                  gen_idx_of_last_collision)

        # Before Merging dead ends, we have to make sure the dead end isn't longer
        # than the final chain we are attempting to merge it into
        chain_length_until_last_collision = gen_idx_of_last_collision + 1
        shortened_dead_ends = shorten_dead_ends(
            dead_ends, chain_length_until_last_collision)

        merge_dead_ends_into_final_chains(chains, shortened_dead_ends,
                                          final_chain_idxs,
                                          gen_idx_of_last_collision)
        remove_duplicates_from_all_tips(chains)

        if vrb_main:
            print(
                "\n chain after removing duplicates from tips, and before glueing: "
            )
            print_generations(chains)

        final_chain = build_final_chain(chains, contacts, final_chain_idxs,
                                        interact_mtrx)

        final_chains_list.append(final_chain)

        if vrb_main:
            print("\nfinal_chain: ")
            print_final_chain(final_chain)

        is_solution = test_solution(final_chain, interact_mtrx)
        if not is_solution:
            all_tests_successful = False
        is_solution_list.append(is_solution)


#------------------------------------------------------------------------------

    print("- INPUT_FILE_NAME ---------------- solution found:")
    for idx, INPUT_FILE_NAME in enumerate(TEST_FILE_NAMES):
        # print(INPUT_FILE_NAME + ": " + str(is_solution_list[idx]))
        print("%-*s  success: %s" %
              (35, INPUT_FILE_NAME, str(is_solution_list[idx])))

    if all_tests_successful:
        print("\n --> All test cases completed SUCCESSFULY. <--\n")
    if not all_tests_successful:
        print("\n --> BAD SOLUTION in test cases! <--\n")

    OPEN_JMOL = []
    for INPUT_FILE_NAME in TEST_FILE_NAMES:

        if INPUT_FILE_NAME is DISPLAY_FILE_NAME:
            OPEN_JMOL.append(True)
        else:
            OPEN_JMOL.append(False)

    for idx, INPUT_FILE_NAME in enumerate(TEST_FILE_NAMES):
        write_bins(final_chains_list[idx], atom_positions_list[idx],
                   INPUT_FILE_NAME, OPEN_JMOL[idx])
    """
Exemplo n.º 2
0
nltk_preprocessor = NLTKPreprocessor()

def get_key(dataset_type):
	if dataset_type == 'binary':
		return 'Abstract'
	elif dataset_type == 'multi-class':
		return 'Text'

def preprocess(dataset, key):
	log.info("Preprocessing data")

	tokens = nltk_preprocessor.transform(dataset[key])
	joined = [';'.join(t) for t in tokens]
	series = pd.Series(joined, index=dataset.index)
	dataset['Tokens'] = series
	
	return dataset

if __name__ == '__main__':
	# Display progress logs on stdout
	log.basicConfig(level=log.DEBUG, format='%(asctime)s %(levelname)s %(message)s')

	parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
	args = get_args(parser)
	dataset_type = io.get_data_set(args.data)
	x, y = io.load_data(args.data)
	new_data = preprocess(x, get_key(dataset_type))
	new_data = pd.concat([new_data, y], axis=1)
	io.save_data(new_data, args.filename)
	log.info("Saved to file: {}".format(args.filename))
Exemplo n.º 3
0
log.basicConfig(level=log.INFO, format='%(asctime)s %(levelname)s %(message)s')

if __name__ == '__main__':
    """Does hp search and stores the parameters for each dataset and classifier."""

    config = io.load_config(sys.argv, None)

    experiment_dir = "{}_hpsearch".format(config['experiment'])
    if not os.path.exists(experiment_dir):
        os.makedirs(experiment_dir)

    for dataset_i, dataset_filename in enumerate(config['datasets']):
        log.debug("DATASET_{}: {}".format(dataset_i, dataset_filename))

        # load preprocessed dataset
        X, y, arff_data = io.load_data(dataset_filename, config)
        dataset_name = os.path.splitext(dataset_filename)[0]
        log.info("DATASET_{}_NAME: {}".format(dataset_i, dataset_name))

        for estimator_i, estimator in enumerate(config['estimators']):
            log.debug("ESTIMATOR_{}: {}".format(estimator_i,
                                                estimator['estimator']))

            # load algorithm
            estimator = ut.get_estimator(estimator)
            estimator_name = estimator.__class__.__name__
            log.info("ESTIMATOR_{}_NAME: {}".format(estimator_i,
                                                    estimator_name))

            search_space, n_iter = ut.get_search_space(estimator)
Exemplo n.º 4
0
        log.warning("Article not available.")

    return keywords, mesh_terms


if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    args = get_args(parser)
    log.debug("Commandline arguments: {}".format(args))

    check_mode_of_operation(args)

    data, _ = io.load_data(args.data)

    if io.get_data_set(args.data) == 'binary':

        keywords = []
        terms = []

        log.info("Fetching keywords an terms...")

        kw_cnt = 0
        t_cnt = 0
        for index, row in data.iterrows():

            #if index > 4:
            #    keywords.append(" ")
            #    terms.append(" ")
Exemplo n.º 5
0
dir_images = './dataset/'
dir_model = './checkpoints/epoch44/'
image_height = 32
batch_size = 1

# Specify ouput file with predictions of test samples
file_predictions = './predictions/test_predictions.txt'
if not os.path.isdir('./predictions/'):
    os.makedirs('./predictions/')

# Get the filenames and corresponding slants for the dataset
test_gt = load_gt(file_test_ids, file_gt)
test_ids = test_gt.keys()

# Load test images
batched_test_data = load_data(dir_images, test_gt, batch_size=batch_size,
                              image_height=image_height)

# Create the model object
model = Model(image_height=image_height)

if not os.path.isdir(dir_model):
    print('Selected model for testing (' + dir_model + ') does not exist')
else:   
    file_model = dir_model + 'model.ckpt'
    num_test_samples = len(batched_test_data)         
    with tf.Session() as session:
        # Load the trained network from file      
        model.saver.restore(session, file_model)

        # Get predictions and error for the test samples
        predictions = []
Exemplo n.º 6
0
num_epochs = 60
batch_size = 8
checkpoint_steps = 1
image_height = 32
learning_rate = 0.0005
dropout_rate = 0.3
width_stretch = 1.8

# Get the filenames and corresponding slants for the datasets
train_gt = load_gt(file_train_ids, file_gt)
valid_gt = load_gt(file_valid_ids, file_gt)

# Load training and validation images
batched_train_data = load_data(dir_images,
                               train_gt,
                               batch_size=batch_size,
                               image_height=image_height,
                               width_stretch=width_stretch)
batched_valid_data = load_data(dir_images,
                               valid_gt,
                               batch_size=batch_size,
                               image_height=image_height,
                               width_stretch=width_stretch)

# Create the model object
model = Model(learning_rate=learning_rate,
              dropout_rate=dropout_rate,
              image_height=image_height)

# Start training
train_costs = []
Exemplo n.º 7
0
    io.save_prediction(combined_data.loc[:, ['Id', 'Category']], prediction_filename)


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    args = get_args(parser)
    log.debug("Commandline arguments: {}".format(args))

    check_mode_of_operation(args)

    if args.score and args.train and args.predict:
        log.info("Mode score->train->predict")

        x, y = io.load_data(args.train)

        # Create a training/validation and a test set for model selection (hyper-parameter search) and evaluation
        x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                            test_size=args.test_size,
                                                            random_state=cfg.split_random_state,
                                                            stratify=y)

        log.info("Created training set ({}) and test set ({})".format(len(y_train), len(y_test)))

        data_set = io.get_data_set(args.predict)
        fu_pl, clf_pl = select_model(args, data_set, x_train, y_train)

        # Score part
        fu_pl, clf_pl = mode_score(args, fu_pl, clf_pl, x_train, y_train, x_test, y_test, data_set)