experiment_results['model'] == str(Model)] space = experiment_results_single_model.loc[ experiment_results_single_model['val_precision_mean'].idxmax(), 'space'] else: space = RandomSearch._sample_single_cnn_space() space['epochs'] = min(space['epochs'], 5) class_proportion = { 1: 0.3, } # Get data generators test_batch_size = 500 train_proportion = 0.7 data_stream = DataStream(batch_size=space['batch_size'], test_batch_size=test_batch_size, train_proportion=train_proportion, class_proportion=class_proportion) train_index_generator, test_index_generator = data_stream.split_by_patch_id( features[['image']], features[['destroyed']]) train_generator = data_stream.get_train_data_generator_from_index( [features['image'], features['destroyed']], train_index_generator) test_indices = list(test_index_generator) test_generator = data_stream.get_test_data_generator_from_index( features['image'], test_indices) num_batches = ceil(len(features) / space['batch_size']) num_batches_test = len(test_indices) # Fit model and predict train_dataset = Dataset.from_generator(lambda: train_generator, (tf.float32, tf.int32))
# Reading features = pd.read_pickle('{}/{}'.format(FEATURES_PATH, features_file_name))\ .dropna(subset=['image']) # features_destroyed = features.loc[features['destroyed'] == 1].sample(500) # features_non_destroyed = features.loc[features['destroyed'] == 0].sample(5000) # features = pd.concat([features_destroyed, features_non_destroyed]) # Modelling Model = CNN # Choose Model model = load_model('{}/model_{}.h5'.format(MODELS_PATH, args['experiment'])) experiments = load_experiment_results() space = experiments.loc[experiments['name'] == 'experiment_{}.json'.format(args['experiment']), 'space'].iloc[0] test_generator = DataStream._get_index_generator(features, space['batch_size'], KFold) num_batches_test = len(test_generator) test_generator = DataStream.get_test_data_generator_from_index( features['image'], test_generator) test_dataset = Dataset.from_generator(lambda: test_generator, tf.float32) # Predict print('Generating predictions') predictions = model.predict_generator(test_dataset, steps=num_batches_test) predictions = pd.DataFrame({ 'prediction': predictions.reshape(-1), }, index=features.index) file_name = '{}/prediction_{}.p'.format(PREDICTIONS_PATH, round(time())) predictions.to_pickle(file_name) print('Store predictions on file: {}'.format(file_name))
else: best_experiment['space']['preprocessing'] = preprocessing space = best_experiment['space'] print('Running with space', space) print('From best experiment') print(best_experiment) # Modelling RUNS = 50 for run in range(RUNS): print('Creating batches') class_proportion = {1: 0.3} test_batch_size = 200 train_proportion = 0.7 data_stream = DataStream(batch_size=space['batch_size'], train_proportion=train_proportion, class_proportion=class_proportion, test_batch_size=test_batch_size) unique_patches = features.index.get_level_values( 'patch_id').unique().tolist() train_patches = random.sample( unique_patches, round(len(unique_patches) * train_proportion)) train_data = features.loc[features.index.get_level_values('patch_id').isin( train_patches)] train_data_upsampled = data_stream._upsample_class_proportion( train_data, class_proportion).sample(frac=1) test_patches = list(set(unique_patches) - set(train_patches)) test_data = features.loc[features.index.get_level_values('patch_id').isin( test_patches)] train_indices = data_stream._get_index_generator(train_data_upsampled, space['batch_size'])
sampler = RandomSearch() models = { CNN: sampler.sample_cnn, CNNPreTrained: sampler.sample_cnn_pretrained, } Model = random.choice([CNN]) sample_func = models[Model] spaces = sample_func(500) # Do splits class_proportion = { 1: 0.3, } batch_size = spaces[0]['batch_size'] test_batch_size = 500 train_proportion = 0.7 data_stream = DataStream(batch_size=batch_size, train_proportion=train_proportion, class_proportion=class_proportion, test_batch_size=test_batch_size) unique_patches = features.index.get_level_values('patch_id').unique().tolist() train_patches = random.sample(unique_patches, round(len(unique_patches)*train_proportion)) train_data = features.loc[features.index.get_level_values('patch_id').isin(train_patches)] train_data_upsampled = data_stream._upsample_class_proportion(train_data, class_proportion).sample(frac=1) test_patches = list(set(unique_patches) - set(train_patches)) test_data = features.loc[features.index.get_level_values('patch_id').isin(test_patches)] train_indices = data_stream._get_index_generator(train_data_upsampled, batch_size) test_indices = data_stream._get_index_generator(test_data, test_batch_size) train_generator = data_stream.get_train_data_generator_from_index( [train_data_upsampled['image'], train_data_upsampled['destroyed']], train_indices) test_generator = data_stream.get_train_data_generator_from_index( [test_data['image'], test_data['destroyed']], test_indices) train_dataset = Dataset.from_generator(lambda: train_generator, (tf.float32, tf.int32))