Пример #1
0
        experiment_results['model'] == str(Model)]
    space = experiment_results_single_model.loc[
        experiment_results_single_model['val_precision_mean'].idxmax(),
        'space']
else:
    space = RandomSearch._sample_single_cnn_space()

space['epochs'] = min(space['epochs'], 5)
class_proportion = {
    1: 0.3,
}
# Get data generators
test_batch_size = 500
train_proportion = 0.7
data_stream = DataStream(batch_size=space['batch_size'],
                         test_batch_size=test_batch_size,
                         train_proportion=train_proportion,
                         class_proportion=class_proportion)
train_index_generator, test_index_generator = data_stream.split_by_patch_id(
    features[['image']], features[['destroyed']])
train_generator = data_stream.get_train_data_generator_from_index(
    [features['image'], features['destroyed']], train_index_generator)

test_indices = list(test_index_generator)
test_generator = data_stream.get_test_data_generator_from_index(
    features['image'], test_indices)

num_batches = ceil(len(features) / space['batch_size'])
num_batches_test = len(test_indices)
# Fit model and predict
train_dataset = Dataset.from_generator(lambda: train_generator,
                                       (tf.float32, tf.int32))
Пример #2
0
# Reading
features = pd.read_pickle('{}/{}'.format(FEATURES_PATH, features_file_name))\
    .dropna(subset=['image'])
# features_destroyed = features.loc[features['destroyed'] == 1].sample(500)
# features_non_destroyed = features.loc[features['destroyed'] == 0].sample(5000)
# features = pd.concat([features_destroyed, features_non_destroyed])
#  Modelling
Model = CNN

#  Choose Model
model = load_model('{}/model_{}.h5'.format(MODELS_PATH, args['experiment']))
experiments = load_experiment_results()
space = experiments.loc[experiments['name'] ==
                        'experiment_{}.json'.format(args['experiment']),
                        'space'].iloc[0]
test_generator = DataStream._get_index_generator(features, space['batch_size'],
                                                 KFold)
num_batches_test = len(test_generator)
test_generator = DataStream.get_test_data_generator_from_index(
    features['image'], test_generator)
test_dataset = Dataset.from_generator(lambda: test_generator, tf.float32)

#  Predict
print('Generating predictions')
predictions = model.predict_generator(test_dataset, steps=num_batches_test)
predictions = pd.DataFrame({
    'prediction': predictions.reshape(-1),
},
                           index=features.index)
file_name = '{}/prediction_{}.p'.format(PREDICTIONS_PATH, round(time()))
predictions.to_pickle(file_name)
print('Store predictions on file: {}'.format(file_name))
Пример #3
0
else:
    best_experiment['space']['preprocessing'] = preprocessing
    space = best_experiment['space']
    print('Running with space', space)
    print('From best experiment')
    print(best_experiment)

# Modelling
RUNS = 50
for run in range(RUNS):
    print('Creating batches')
    class_proportion = {1: 0.3}
    test_batch_size = 200
    train_proportion = 0.7
    data_stream = DataStream(batch_size=space['batch_size'],
                             train_proportion=train_proportion,
                             class_proportion=class_proportion,
                             test_batch_size=test_batch_size)
    unique_patches = features.index.get_level_values(
        'patch_id').unique().tolist()
    train_patches = random.sample(
        unique_patches, round(len(unique_patches) * train_proportion))
    train_data = features.loc[features.index.get_level_values('patch_id').isin(
        train_patches)]
    train_data_upsampled = data_stream._upsample_class_proportion(
        train_data, class_proportion).sample(frac=1)
    test_patches = list(set(unique_patches) - set(train_patches))
    test_data = features.loc[features.index.get_level_values('patch_id').isin(
        test_patches)]

    train_indices = data_stream._get_index_generator(train_data_upsampled,
                                                     space['batch_size'])
sampler = RandomSearch()
models = {
    CNN: sampler.sample_cnn,
    CNNPreTrained: sampler.sample_cnn_pretrained,
}
Model = random.choice([CNN])
sample_func = models[Model]
spaces = sample_func(500)
# Do splits
class_proportion = {
    1: 0.3,
}
batch_size = spaces[0]['batch_size']
test_batch_size = 500
train_proportion = 0.7
data_stream = DataStream(batch_size=batch_size, train_proportion=train_proportion,
                         class_proportion=class_proportion, test_batch_size=test_batch_size)

unique_patches = features.index.get_level_values('patch_id').unique().tolist()
train_patches = random.sample(unique_patches, round(len(unique_patches)*train_proportion))
train_data = features.loc[features.index.get_level_values('patch_id').isin(train_patches)]
train_data_upsampled = data_stream._upsample_class_proportion(train_data, class_proportion).sample(frac=1)
test_patches = list(set(unique_patches) - set(train_patches))
test_data = features.loc[features.index.get_level_values('patch_id').isin(test_patches)]

train_indices = data_stream._get_index_generator(train_data_upsampled, batch_size)
test_indices = data_stream._get_index_generator(test_data, test_batch_size)
train_generator = data_stream.get_train_data_generator_from_index(
    [train_data_upsampled['image'], train_data_upsampled['destroyed']], train_indices)
test_generator = data_stream.get_train_data_generator_from_index(
    [test_data['image'], test_data['destroyed']], test_indices)
train_dataset = Dataset.from_generator(lambda: train_generator, (tf.float32, tf.int32))