示例#1
0
def test(output_dir, weights_dir, test_dir):
"""
Tests the given model and generates a csv containing segmentation masks compressed using rle
Parameters:
output_dir -- directory path to store output csv
weights_dir -- directory path to load weights of given model
test_dir -- directory path containing test images

"""

    batch_size = params.batch_size
    
    model = params.model_factory()

    # Copy file from gcs to local directory
    with file_io.FileIO(weights_dir, mode='r') as input_f:
        with file_io.FileIO('weights.hdf5', mode='w+') as output_f:
            output_f.write(input_f.read())

    model.load_weights(filepath='weights.hdf5')
    graph = tf.get_default_graph()

    with file_io.FileIO(test_dir + '/sample_submission.csv', mode='r') as f:
        csv_bytes = f.read()
        df_test = pd.read_csv(io.BytesIO(csv_bytes))

    ids_test = df_test['img'].map(lambda s: s.split('.')[0])

    names = []
    for id in ids_test:
        names.append('{}.jpg'.format(id))

    q_size = 10

    q = Queue.Queue(maxsize=q_size)
    t1 = threading.Thread(target=data_loader, name='DataLoader', args=(q, ids_test, test_dir))
    t2 = threading.Thread(target=predictor, name='Predictor', args=(q, len(ids_test), graph, model))
    print('Predicting on {} samples with batch_size = {}...'.format(len(ids_test), batch_size))
    t1.start()
    t2.start()
    # Wait for both threads to finish
    t1.join()
    t2.join()

    print("Generating submission file...")
    df = pd.DataFrame({'img': names, 'rle_mask': rles})

    # Copy submission file to gcs
    df.to_csv('submission.csv.gz', index=False, compression='gzip')
    with file_io.FileIO('submission.csv.gz', mode='r') as input_f:
        with file_io.FileIO(output_dir + '/submission.csv.gz', mode='w+') as output_f:
            output_f.write(input_f.read())
示例#2
0
def train_vggnet(train_dir, job_dir):
  """
  Trains a fully convolutional VGG-16 CNN model.
  Parameters:
  train_dir -- directory containing training data
  job_dir -- directory where output files (log and weight files) are stored

  """
  # Get parameteers from params file
    import params

    input_size = params.input_size
    epochs = params.max_epochs
    batch_size = params.batch_size
    model = params.model_factory()

  # Get image names from csv file
    with file_io.FileIO(train_dir + '/train_masks.csv', mode='r') as f:
        csv_bytes = f.read()
        df_train = pd.read_csv(io.BytesIO(csv_bytes))
    ids_train = df_train['img'].map(lambda s: s.split('.')[0])

  # Split images into training and validation sets
    ids_train_split, ids_valid_split = train_test_split(ids_train, test_size=0.2, random_state=42)

    print('Training on {} samples'.format(len(ids_train_split)))
    print('Validating on {} samples'.format(len(ids_valid_split)))

  # Initialize callbacks
    callbacks = [EarlyStopping(monitor='val_loss',
                               patience=3,
                               verbose=1,
                               min_delta=1e-4),
                 ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.1,
                                   patience=4,
                                   verbose=1,
                                   epsilon=1e-4),
                 ModelSave(monitor='val_loss',
                           filepath=job_dir,
                           save_best_only=True,
                           save_weights_only=True),
                 TensorBoard(log_dir=job_dir + '/logs')]
  
  # Start training
    model.fit_generator(generator=train_generator(train_dir, ids_train_split, batch_size, input_size),
                        steps_per_epoch=np.ceil(float(len(ids_train_split)) / float(batch_size)),
                        epochs=epochs,
                        verbose=2,
                        callbacks=callbacks,
                        validation_data=valid_generator(train_dir, ids_valid_split, batch_size, input_size),
                        validation_steps=np.ceil(float(len(ids_valid_split)) / float(batch_size)))
示例#3
0
import os
import random

gpu_id = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)

import params
DATA_PATH = '/media/Disk/yanpengxiang/dataset/carvana/'

input_size = params.input_size
orig_width = params.orig_width
orig_height = params.orig_height

epochs = params.max_epochs
batch_size = params.batch_size
model = params.model_factory()

#df_train = pd.read_csv(DATA_PATH + 'train_masks.csv')
#ids_train = df_train['img'].map(lambda s: s.split('.')[0])
ids_train = []
with open('input/train_id.txt') as f:
    for line in f:
        ids_train.append(line[:15])

ids_train_split, ids_valid_split = train_test_split(ids_train,
                                                    test_size=0.1,
                                                    random_state=42)

print('Training on {} samples'.format(len(ids_train_split)))
print('Validating on {} samples'.format(len(ids_valid_split)))
示例#4
0
                continue
            single_layer_mem *= s
        shapes_mem_count += single_layer_mem

    trainable_count = np.sum(
        [K.count_params(p) for p in set(model.trainable_weights)])
    non_trainable_count = np.sum(
        [K.count_params(p) for p in set(model.non_trainable_weights)])

    total_memory = 4.0 * batch_size * (shapes_mem_count + trainable_count +
                                       non_trainable_count)
    gbytes = np.round(total_memory / (1024.0**3), 3)
    return gbytes


model = params.model_factory(input_shape=params.input_shape)
model.compile(
    optimizer=optimizers.RMSprop(lr=0.01),
    loss='binary_crossentropy',
    #loss=custom_losses.floss,
    metrics=[
        'accuracy', custom_losses.fmeasure, custom_losses.recall,
        custom_losses.precision
    ])

labels = load_labels()

ids_train_split = glob.glob(params.folder_train + "*.*")
ids_valid_split = glob.glob(params.folder_valid + "*.*")

print('Memory needed estimation: {}GB'.format(get_model_memory_usage(model)))
                        save_best_only=True,
                        save_weights_only=True,
                        mode='min')
    ]


datagen = ImageDataGenerator(horizontal_flip=aug_horizontal_flip,
                             vertical_flip=aug_vertical_flip,
                             rotation_range=aug_rotation,
                             width_shift_range=aug_width_shift,
                             height_shift_range=aug_height_shift,
                             channel_shift_range=aug_channel_shift,
                             shear_range=aug_shear,
                             zoom_range=aug_zoom)

model_info = params.model_factory(input_shape=X_train.shape[1:],
                                  inputs_meta=M_train.shape[1])
model_info.summary()

with open(best_model_path, "w") as json_file:
    json_file.write(model_info.to_json())


def train_and_evaluate_model(model, X_tr, y_tr, X_cv, y_cv):
    xtr, mtr = X_tr
    xcv, mcv = X_cv

    hist = model.fit_generator(
        get_data_generator(datagen, xtr, mtr, ytr, batch_size=batch_size),
        steps_per_epoch=np.ceil(float(len(xtr)) / float(batch_size)),
        epochs=epochs,
        verbose=2,