def __train_model(params, x_train, y_train, x_valid, y_valid, no_early_stopping=False, data_dir='../tmp/'): """ Trains the model. :params params: The ParamConfig in question :params x_train: The training data :params y_train: The training labels :params x_valid: The validation data :params y_valid: The validation labels :params no_early_stopping: Determine whether to use no early stopping :params data_dir: Temp directory :return: The trained model, and the training history. """ train_gen, valid_gen = params.generator.generator_callable( x_train, y_train, x_valid, y_valid, params.batch_size, **params.generator.__dict__ ) model = params.model.model_callable(input_shape=x_train.shape[1:], num_classes=y_train.shape[1], **params.model.__dict__) model_metrics = ['acc', utils.sensitivity, utils.specificity, utils.true_positives, utils.false_negatives] model.compile( optimizer=getattr(keras.optimizers, params.model.optimizer)(lr=1e-5), loss=params.model.loss, metrics=model_metrics) if no_early_stopping: model_filename = str(datetime.datetime.now()) model_filepath = '{}/{}.hdf5'.format(data_dir, model_filename) cbs = utils.create_callbacks(x_train, y_train, x_valid, y_valid, early_stopping=False, reduce_lr=params.reduce_lr) cbs.append(ModelCheckpoint(filepath=model_filepath, save_best_only=True, monitor='val_acc', mode='max', verbose=1)) else: cbs = utils.create_callbacks(x_train, y_train, x_valid, y_valid, early_stopping=params.early_stopping, reduce_lr=params.reduce_lr) history = model.fit_generator(train_gen, epochs=params.max_epochs, validation_data=valid_gen, verbose=2, callbacks=cbs) if no_early_stopping: metrics.sensitivity = utils.sensitivity metrics.specificity = utils.specificity metrics.true_positives = utils.true_positives metrics.false_negatives = utils.false_negatives model = load_model(model_filepath) os.remove(model_filepath) return model, history
def start_job(x_train: np.ndarray, y_train: np.ndarray, x_valid: np.ndarray, y_valid: np.ndarray, job_name: str, username: str, params: blueno.ParamConfig, slack_token: str = None, log_dir: str = None, plot_dir=None, id_valid: np.ndarray = None) -> None: """ Builds, fits, and evaluates a model. If slack_token is not none, uploads an image. For advanced users it is recommended that you input your own job function and attach desired loggers. :param x_train: :param y_train: the training labels, must be a 2D array :param x_valid: :param y_valid: the validation labels, must be a 2D array :param job_name: :param username: :param slack_token: the slack token :param params: the parameters specified :param log_dir: :param plot_dir: the directory to save plots to, defaults to /tmp/plots- :param id_valid: the patient ids ordered to correspond with y_valid :return: """ num_classes = y_train.shape[1] created_at = datetime.datetime.utcnow().isoformat() if plot_dir is None: gpu = os.environ["CUDA_VISIBLE_DEVICES"] plot_dir = pathlib.Path('tmp') / f'plots-{gpu}' # Configure the job to log all output to a specific file csv_filepath = None log_filepath = None if log_dir: if '/' in job_name: raise ValueError("Job name cannot contain '/' character") log_filepath = str(pathlib.Path(log_dir) / f'{job_name}-{created_at}.log') assert log_filepath.startswith(log_dir) csv_filepath = log_filepath[:-3] + 'csv' logger.configure_job_logger(log_filepath) # This must be the first lines in the jo log, do not change logging.info(f'using params:\n{params}') logging.info(f'author: {username}') logging.debug(f'in start_job,' f' using gpu {os.environ["CUDA_VISIBLE_DEVICES"]}') logging.info('preparing data and model for training') model_params = params.model generator_params = params.generator train_gen, valid_gen = generator_params.generator_callable( x_train, y_train, x_valid, y_valid, params.batch_size, **generator_params.__dict__) logging.debug(f'num_classes is: {num_classes}') # Construct the uncompiled model model: keras.Model model = model_params.model_callable(input_shape=x_train.shape[1:], num_classes=num_classes, **model_params.__dict__) logging.debug( 'using default metrics: acc, sensitivity, specificity, tp, fn') metrics = ['acc', utils.sensitivity, utils.specificity, utils.true_positives, utils.false_negatives] model.compile(optimizer=model_params.optimizer, loss=model_params.loss, metrics=metrics) model_filepath = '/tmp/{}.hdf5'.format(os.environ['CUDA_VISIBLE_DEVICES']) logging.debug('model_filepath: {}'.format(model_filepath)) callbacks = utils.create_callbacks(x_train, y_train, x_valid, y_valid, early_stopping=params.early_stopping, reduce_lr=params.reduce_lr, csv_file=csv_filepath, model_file=model_filepath) logging.info('training model') history = model.fit_generator(train_gen, epochs=params.max_epochs, validation_data=valid_gen, verbose=2, callbacks=callbacks) try: blueno.gcs.upload_gcs_plots(x_train, x_valid, y_valid, model, history, job_name, created_at, plot_dir=plot_dir, id_valid=id_valid) except DefaultCredentialsError as e: logging.warning(e) if slack_token: logging.info('generating slack report') blueno.slack.slack_report(x_train, x_valid, y_valid, model, history, job_name, params, slack_token, plot_dir=plot_dir, id_valid=id_valid) else: logging.info('no slack token found, not generating report') # acc_i = model.metrics_names.index('acc') # TODO(luke): Document this change, originally we only upload good models, # now we upload all models to GCS # if model.evaluate_generator(valid_gen)[acc_i] >= 0.8: upload_model_to_gcs(job_name, created_at, model_filepath) end_time = datetime.datetime.utcnow().isoformat() # Do not change, this generates the ended at ES field logging.info(f'end time: {end_time}') # Upload logs to Kibana if log_dir: # Creates a connection to our Airflow instance # We don't need to remove since the process ends connections.create_connection(hosts=['http://104.196.51.205']) elasticsearch.insert_or_ignore_filepaths( pathlib.Path(log_filepath), pathlib.Path(csv_filepath), )
metrics = [ 'acc', utils.true_positives, utils.false_negatives, utils.sensitivity, utils.specificity ] # train 10 models on the data for i in range(10): model = c3d.C3DBuilder.build() opt = SGD(lr=LEARN_RATE, momentum=0.9, nesterov=True) model.compile(optimizer=opt, loss={"out_class": "binary_crossentropy"}, metrics=metrics) callbacks = utils.create_callbacks(x_train=x_train, y_train=y_train, x_valid=x_val, y_valid=y_val, normalize=False) checkpoint = ModelCheckpoint(f'tmp/c3d_no_aug_{i}.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='auto') callbacks.append(checkpoint) history = model.fit(x=x_train, y=y_train, epochs=100, batch_size=16, callbacks=callbacks, validation_data=(x_val, y_val),
def train_model(x_train: np.ndarray, y_train: np.ndarray, x_valid: np.ndarray, y_valid: np.ndarray): """ Train a C3D model on the given data. :param x_train: :param y_train: :param x_valid: :param y_valid: :return: """ # To avoid data type issues in the Keras library calls if x_train.shape[1:] != (32, 32, 32) or x_valid.shape[1:] != (32, 32, 32): raise ValueError( 'x_train and x_valid should have shape (?, 32, 32,32), got {} ' 'and {}'.format(x_train.shape, x_valid.shape)) if y_train.ndim != 1 or y_valid.ndim != 1: raise ValueError( 'y_train and y_valid should have shape (?,), got {} and {}'.format( y_train.shape, y_valid.shape)) x_train = x_train.astype(np.float32) y_train = y_train.astype(np.int16) x_valid = x_valid.astype(np.float32) y_valid = y_valid.astype(np.int16) x_train = np.expand_dims(x_train, axis=-1) x_valid = np.expand_dims(x_valid, axis=-1) y_train = np.expand_dims(y_train, axis=-1) y_valid = np.expand_dims(y_valid, axis=-1) mean = x_train.mean() std = x_train.std() x_train = (x_train - mean) / std x_valid = (x_valid - mean) / std describe_data(x_train, y_train, x_valid, y_valid) metrics = [ 'acc', utils.true_positives, utils.false_negatives, utils.sensitivity, utils.specificity ] model = C3DBuilder.build() opt = keras.optimizers.Adam(lr=1e-4) model.compile(optimizer=opt, loss={"out_class": "binary_crossentropy"}, metrics=metrics) callbacks = utils.create_callbacks(x_train=x_train, y_train=y_train, x_valid=x_valid, y_valid=y_valid, normalize=False) history = model.fit(x=x_train, y=y_train, epochs=50, batch_size=8, validation_data=(x_valid, y_valid), callbacks=callbacks, verbose=2) now = datetime.datetime.utcnow().isoformat() plotting.upload_gcs_plots(x_train, x_valid, y_valid, model, history, job_name='c3d-luke', created_at=now, chunk=True)
def test_create_callbacks_one_class(): X = np.random.rand(10, 224, 224, 3) y = np.random.randint(0, 1, size=(10, 1)) utils.create_callbacks(X, y, X, y, csv_file='/tmp/callbacks_test.csv')
def test_create_callbacks_reduce_lr(): X = np.random.rand(10, 224, 224, 3) y = np.random.randint(0, 3, size=(10, )) y = sklearn.preprocessing.label_binarize(y, [0, 1, 2]) utils.create_callbacks(X, y, X, y, reduce_lr=True)
def test_create_callbacks_three_classes(): X = np.random.rand(10, 224, 224, 3) y = np.random.randint(0, 3, size=(10, )) y = sklearn.preprocessing.label_binarize(y, [0, 1, 2]) utils.create_callbacks(X, y, X, y, csv_file='/tmp/callbacks_test.csv')