Exemplo n.º 1
0
    def on_epoch_begin(self, epoch, logs={}):
        if epoch > 0 and epoch % self.eval_frequency == 0:

            # Unhappy hack to work around h5py not being able to write to GCS.
            # Force snapshots and saves to local filesystem, then copy them
            # over to GCS.
            model_path_glob = 'checkpoint.*'
            if not self.job_dir.startswith("gs://"):
                model_path_glob = os.path.join(self.job_dir, model_path_glob)
            checkpoints = glob.glob(model_path_glob)
            if len(checkpoints) > 0:
                checkpoints.sort()
                face_age_model = load_model(checkpoints[-1], compile=False)
                face_age_model = model.compile_model(face_age_model,
                                                     self.learning_rate)
                # data_sequence = DataSequence(
                #    self.validation_prefix)
                loss, acc, mae = face_age_model.evaluate_generator(
                    self.data_sequence, steps=self.data_sequence.length)
                print(
                    '\nEvaluation epoch[{}] metrics[{:.2f}, {:.2f}, {:.2f}] {}'
                    .format(epoch, loss, acc, mae,
                            face_age_model.metrics_names))
                if self.job_dir.startswith("gs://"):
                    copy_file_to_gcs(self.job_dir, checkpoints[-1])
            else:
                print('\nEvaluation epoch[{}] (no checkpoints found)'.format(
                    epoch))
    def on_epoch_begin(self, epoch, logs={}):
        if epoch > 0 and epoch % self.eval_frequency == 0:

            # Unhappy hack to work around h5py not being able to write to GCS.
            # Force snapshots and saves to local filesystem, then copy them over to GCS.
            model_path_glob = 'checkpoint.*'
            if not self.job_dir.startswith("gs://"):
                model_path_glob = os.path.join(self.job_dir, model_path_glob)
            checkpoints = glob.glob(model_path_glob)
            if len(checkpoints) > 0:
                checkpoints.sort()
                CHURN_MODEL = load_model(checkpoints[-1])
                CHURN_MODEL = model.compile_model(churn_model,
                                                  self.learning_rate)
                loss, acc = churn_model.evaluate_generator(
                    model.generator_input(self.eval_files,
                                          chunk_size=CHUNK_SIZE),
                    steps=self.steps)
                print(
                    '\nEvaluation epoch[{}] metrics[{:.2f}, {:.2f}] {}'.format(
                        epoch, loss, acc, churn_model.metrics_names))
                if self.job_dir.startswith("gs://"):
                    copy_file_to_gcs(self.job_dir, checkpoints[-1])
            else:
                print('\nEvaluation epoch[{}] (no checkpoints found)'.format(
                    epoch))
Exemplo n.º 3
0
 def on_epoch_begin(self, epoch, logs={}):
     if epoch > 0 and epoch % self.eval_frequency == 0:
         # Unhappy hack to work around h5py not being able to write to GCS.
         # Force snapshots and saves to local filesystem,
         #  then copy them over to GCS.
         model_path_glob = 'checkpoint.*'
         if not self.job_dir.startswith("gs://"):
             model_path_glob = os.path.join(self.job_dir, model_path_glob)
         checkpoints = glob.glob(model_path_glob)
         if len(checkpoints) > 0:
             checkpoints.sort()
             beiras_model = load_model(checkpoints[-1])
             beiras_model = model.compile_model(beiras_model,
                                                self.learning_rate)
             x_eval, y_eval = model.get_array_x_y(self.eval_files,
                                                  self.steps, WINDOWS_SIZE,
                                                  NUM_CHARS)
             loss, acc = beiras_model.evaluate(x_eval, y_eval)
             print '\nEvaluation epoch[{}] metrics[{:.2f}, {:.2f}] {}'.\
                 format(epoch, loss, acc,
                        beiras_model.metrics_names)
             if self.job_dir.startswith("gs://"):
                 copy_file_to_gcs(self.job_dir, checkpoints[-1])
         else:
             print '\nEvaluation epoch[{}] (no checkpoints found)'.\
                 format(epoch)
Exemplo n.º 4
0
    def on_epoch_begin(self, epoch, logs={}):
        if epoch > 0 and epoch % int(self.eval_frequency) == 0:

            # workaround bc h5py cannot write to GCS
            # save to local filesystem, then copy over to GCS
            model_path_glob = 'checkpoint.*'
            if not self.job_dir.startswith("gs://"):
                model_path_glob = os.path.join(self.job_dir, model_path_glob)
            checkpoints = glob.glob(model_path_glob)
            if len(checkpoints) > 0:
                checkpoints.sort()
                # select latest model checkpoint
                conv_model = load_model(checkpoints[-1])
                conv_model = model.compile_model(conv_model,
                                                 self.learning_rate,
                                                 self.momentum)
                loss, acc = conv_model.evaluate_generator(
                    # generator=self.eval_generator,
                    generator=self.eval_sequence,
                    steps=self.steps,
                    # max_queue_size=10,
                    # workers=1,
                    # use_multiprocessing=False
                )
                print('Evaluation epoch[{}] metrics[{:.2f}, {:.2f}] {}'.format(
                    epoch, loss, acc, conv_model.metrics_names))
                if self.job_dir.startswith("gs://"):
                    copy_file_to_gcs(self.job_dir, checkpoints[-1])
            else:
                print('Evaluation epoch[{}] (no checkpoints found)'.format(
                    epoch))
Exemplo n.º 5
0
def dispatch(data_file, job_dir, num_epochs):
    job_dir = create_job_dir(job_dir)
    nb_chars, embedding_matrix, x_train, y_train, x_val, y_val = \
        model.get_training_data(data_file, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH, VALIDATION_SPLIT, EMBEDDING_FILE_GCS)
    my_model = model.model_fn(nb_chars, embedding_matrix)

    # Unhappy hack to work around h5py not being able to write to GCS.
    # Force snapshots and saves to local filesystem, then copy them over to GCS.
    checkpoint_path = FILE_PATH
    if not job_dir.startswith("gs://"):
        checkpoint_path = os.path.join(job_dir, checkpoint_path)

    # Model checkpoint callback
    checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                 monitor='val_loss',
                                                 verbose=1,
                                                 save_best_only=True,
                                                 mode='min')

    timestamp = str(time.time())

    # Tensorboard logs callback
    tblog = keras.callbacks.TensorBoard(log_dir=os.path.join(job_dir, 'logs'),
                                        write_graph=True,
                                        embeddings_freq=0)

    callbacks = [checkpoint, tblog]

    my_model = model.compile_model(my_model)
    my_model.fit(x_train,
                 y_train,
                 validation_data=(x_val, y_val),
                 epochs=num_epochs,
                 batch_size=128,
                 callbacks=callbacks)

    # Unhappy hack to work around h5py not being able to write to GCS.
    # Force snapshots and saves to local filesystem, then copy them over to GCS.
    if job_dir.startswith("gs://"):
        my_model.save(MY_MODEL_NAME)
        copy_file_to_gcs(job_dir, MY_MODEL_NAME)
    else:
        my_model.save(os.path.join(job_dir, MY_MODEL_NAME))

    # Convert the Keras model to TensorFlow SavedModel
    model.to_savedmodel(my_model, os.path.join(job_dir, 'export'))
Exemplo n.º 6
0
    def on_epoch_end(self, epoch, logs={}):
        self.epochs_since_last_save += 1
        if self.epochs_since_last_save >= self.eval_frequency:
            self.epochs_since_last_save = 0
            # Unhappy hack to work around h5py not being able to write to GCS.
            # Force snapshots and saves to local filesystem, then copy them over to GCS.
            model_path_glob = 'checkpoint.*'
            if not self.job_dir.startswith("gs://"):
                model_path_glob = os.path.join(self.job_dir, model_path_glob)
            checkpoints = glob.glob(model_path_glob)
            if len(checkpoints) > 0:
                checkpoints.sort()
                forecast_model = load_model(checkpoints[-1])
                forecast_model = model.compile_model(forecast_model)
                x, y = model.load_features(self.eval_files, self.scaler,
                                           self.labelencoder_DayOfWeek,
                                           self.labelencoder_StoreType,
                                           self.labelencoder_Assortment,
                                           self.onehotencoder)
                metrics = forecast_model.evaluate(x, y)
                print('\n*** Evaluation epoch[{}] metrics {} {}'.format(
                    epoch, metrics, forecast_model.metrics_names))

                y_hat = forecast_model.predict(x)
                y_hat = model.invert_scale_sales(y_hat, self.scaler)
                np.savetxt(
                    os.path.join(self.job_dir,
                                 'preds/yhat_{:06d}.txt'.format(epoch)), y_hat)

                self.tf_logger.append(metrics_dict={
                    name: value
                    for (name,
                         value) in zip(forecast_model.metrics_names, metrics)
                },
                                      epoch=epoch)

                if self.job_dir.startswith("gs://"):
                    copy_file_to_gcs(self.job_dir, checkpoints[-1])
            else:
                print(
                    '\n*** Evaluation epoch[{}] (no checkpoints found)'.format(
                        epoch))
Exemplo n.º 7
0
 def on_epoch_begin(self, epoch, logs={}):
   """Compile and save model."""
   if epoch > 0 and epoch % self.eval_frequency == 0:
     # Unhappy hack to work around h5py not being able to write to GCS.
     # Force snapshots and saves to local filesystem, then copy them over to GCS.
     model_path_glob = 'checkpoint.*'
     if not self.job_dir.startswith('gs://'):
       model_path_glob = os.path.join(self.job_dir, model_path_glob)
     checkpoints = glob.glob(model_path_glob)
     if len(checkpoints) > 0:
       checkpoints.sort()
       census_model = load_model(checkpoints[-1])
       census_model = model.compile_model(census_model, self.learning_rate)
       loss, acc = census_model.evaluate_generator(
           model.generator_input(self.eval_files, chunk_size=CHUNK_SIZE),
           steps=self.steps)
       print('\nEvaluation epoch[{}] metrics[{:.2f}, {:.2f}] {}'.format(
           epoch, loss, acc, census_model.metrics_names))
       if self.job_dir.startswith('gs://'):
         copy_file_to_gcs(self.job_dir, checkpoints[-1])
     else:
       print('\nEvaluation epoch[{}] (no checkpoints found)'.format(epoch))
Exemplo n.º 8
0
def dispatch(train_prefix, validation_prefix, job_dir, learning_rate,
             num_epochs, checkpoint_epochs, lam, dropout, model_file):

    # download train data
    #train_tmp_prefix, val_tmp_prefix = download_mats(train_prefix, validation_prefix)
    train_tmp_prefix = train_prefix
    val_tmp_prefix = validation_prefix
    print(train_tmp_prefix, val_tmp_prefix)

    # download train data
    #validation_tmp_prefix = download_mats(validation_prefix)

    #train_x, train_y, cv_x, cv_y, input_shape = create_data(train_tmp_prefix)

    logger = logging.getLogger()
    sh = StreamHandler(stdout)
    logger.addHandler(sh)
    logger.setLevel(logging.INFO)
    logger.info('learning_rate=%s' % learning_rate)
    if model_file is not None:
        if model_file.startswith('gs://'):
            cmd = 'gsutil cp %s /tmp' % model_file
            subprocess.check_call(cmd.split())
            real_model_file = '/tmp/%s' % model_file.split('/')[-1]
        else:
            real_model_file = model_file
        face_age_model = load_model(real_model_file, compile=False)
        face_age_model = model.compile_model(face_age_model, learning_rate)
    else:
        face_age_model = model.model_fn(learning_rate, lam, dropout)

    try:
        os.makedirs(job_dir)
    except Exception:
        pass

    # Unhappy hack to work around h5py not being able to write to GCS.
    # Force snapshots and saves to local filesystem, then copy them over to
    # GCS.
    checkpoint_path = FILE_PATH
    if not job_dir.startswith("gs://"):
        checkpoint_path = os.path.join(job_dir, checkpoint_path)
        verbose = 1
        multi = False
        num_worker = 1
    else:
        verbose = 2
        multi = False
        num_worker = 1  #multiprocessing.cpu_count()

#
#     meta_data = get_meta(train_files)
#     indexes = [i for i in range(len(meta_data))]
#     random.shuffle(indexes)
#     meta_data = meta_data.loc[indexes].reset_index(drop=True)

# Model checkpoint callback
    checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                 monitor='val_loss',
                                                 verbose=1,
                                                 period=checkpoint_epochs,
                                                 mode='max')

    # Continuous eval callback
    val_datasequence = FileDataSequence(val_tmp_prefix)
    #     evaluation = ContinuousEval(eval_frequency,
    #                                 # validation_tmp_prefix,
    #                                 val_datasequence,
    #                                 learning_rate,
    #                                 job_dir,
    #                                 )

    # Tensorboard logs callback
    tblog = keras.callbacks.TensorBoard(log_dir=os.path.join(job_dir, 'logs'),
                                        histogram_freq=0,
                                        write_graph=True,
                                        embeddings_freq=0)

    callbacks = [checkpoint, tblog]

    train_data_sequence = FileDataSequence(train_tmp_prefix)
    #x_train, y_train = train_data_sequence.__getitem__(0)
    #     test_data_sequence = DataSequence(
    #         validation_tmp_prefix
    #     )

    face_age_model.fit_generator(  # x_train, y_train,
        #model.generator_input(train_files, chunk_size=CHUNK_SIZE),
        train_data_sequence,
        validation_data=val_datasequence,
        validation_steps=val_datasequence.length,
        steps_per_epoch=train_data_sequence.length,
        verbose=verbose,
        epochs=num_epochs,
        callbacks=callbacks)

    # plot_history(history)
    # Unhappy hack to work around h5py not being able to write to GCS.
    # Force snapshots and saves to local filesystem, then copy them over to
    # GCS.
    if job_dir.startswith("gs://"):
        face_age_model.save(FACE_AGE_MODEL)
        copy_file_to_gcs(job_dir, FACE_AGE_MODEL)
    else:
        face_age_model.save(os.path.join(job_dir, FACE_AGE_MODEL))

    # Convert the Keras model to TensorFlow SavedModel
    model.to_savedmodel(face_age_model, os.path.join(job_dir, 'export'))
Exemplo n.º 9
0
def dispatch(train_files, eval_files, job_dir, train_steps, eval_steps,
             learning_rate, eval_frequency, num_epochs, checkpoint_epochs,
             gpus):

    # With severals GPU you use 2 models, un for training and other for store.
    # The first one is assigned to the CPU,
    # the other run in the GPU and is generated using multi_gpu_model
    if gpus <= 1:
        model_train = model.model_fn(NUM_CHARS, window_size=WINDOWS_SIZE)
        model_save = model_train
    else:
        with tf.device("/cpu:0"):
            model_save = model.model_fn(NUM_CHARS, window_size=WINDOWS_SIZE)
        model_train = multi_gpu_model(model_save, gpus=gpus)
        model.compile_model(model_save, learning_rate)
        print(model_save.summary())
    model.compile_model(model_train, learning_rate)
    print(model_train.summary())

    try:
        os.makedirs(job_dir)
    except:
        pass

    # Unhappy hack to work around h5py not being able to write to GCS.
    # Force snapshots and saves to local filesystem,
    # then copy them over to GCS.
    checkpoint_path = FILE_PATH
    if not job_dir.startswith("gs://"):
        checkpoint_path = os.path.join(job_dir, checkpoint_path)

    # Model checkpoint callback
    checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                 monitor='val_loss',
                                                 verbose=0,
                                                 period=checkpoint_epochs,
                                                 mode='max')

    # Continuous eval callback
    evaluation = ContinuousEval(eval_frequency,
                                eval_files,
                                learning_rate,
                                job_dir,
                                steps=eval_steps)

    # Tensorboard logs callback
    tblog = keras.callbacks.TensorBoard(log_dir=os.path.join(job_dir, 'logs'),
                                        histogram_freq=0,
                                        write_graph=True,
                                        embeddings_freq=0)

    callbacks = [checkpoint, evaluation, tblog]

    x, y = model.get_array_x_y(train_files, train_steps, WINDOWS_SIZE,
                               NUM_CHARS)

    model_train.fit(x,
                    y,
                    epochs=num_epochs,
                    callbacks=callbacks,
                    batch_size=500)

    # Unhappy hack to work around h5py not being able to write to GCS.
    # Force snapshots and saves to local filesystem,
    # then copy them over to GCS.
    if job_dir.startswith("gs://"):
        model_save.save(BEIRAS_MODEL)
        copy_file_to_gcs(job_dir, BEIRAS_MODEL)
    else:
        model_save.save(os.path.join(job_dir, BEIRAS_MODEL))

    # Convert the Keras model to TensorFlow SavedModel
    model.to_savedmodel(model_save, os.path.join(job_dir, 'export'))