示例#1
0
def base_ensemble():
    max_l = Config.Train.max_len
    val_df = pd.read_csv(Config.validation_path)

    models_paths = list(
        Path(Config.Train.checkpoint_dir / Config.model_type).iterdir())
    start_idx = 0
    end_idx = 0
    jaccards = []
    for path in models_paths:
        tf.keras.backend.clear_session()

        _generator = RobertaDataGenerator(val_df, augment=False)
        dataset = tf.data.Dataset.from_generator(_generator.generate,
                                                 output_types=({
                                                     'ids': tf.int32,
                                                     'att': tf.int32,
                                                     'tti': tf.int32
                                                 }, {
                                                     'sts': tf.int32,
                                                     'ets': tf.int32
                                                 }))
        dataset = dataset.padded_batch(Config.Train.batch_size,
                                       padded_shapes=({
                                           'ids': [max_l],
                                           'att': [max_l],
                                           'tti': [max_l]
                                       }, {
                                           'sts': [max_l],
                                           'ets': [max_l]
                                       }),
                                       padding_values=({
                                           'ids': 1,
                                           'att': 0,
                                           'tti': 0
                                       }, {
                                           'sts': 0,
                                           'ets': 0
                                       }))
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

        model = get_roberta()
        model.load_weights(str(path))

        s_idx, e_idx = model.predict(dataset, verbose=1)
        start_idx += s_idx
        end_idx += e_idx
        jaccard = get_jaccard_from_df(val_df, np.argmax(s_idx, axis=-1),
                                      np.argmax(e_idx, axis=-1), 'roberta',
                                      None)
        jaccards.append(jaccard)
    print(f'\nMean jaccard for all models: {np.mean(jaccards)}')
    start_idx /= 5
    end_idx /= 5
    e_jaccard = get_jaccard_from_df(val_df, np.argmax(start_idx, axis=-1),
                                    np.argmax(end_idx, axis=-1), 'roberta',
                                    None)
    print(f'Mean ensemble jaccard for models (base): {e_jaccard}\n')
示例#2
0
def max_joint_proba_ensemble():
    max_l = Config.Train.max_len
    val_df = pd.read_csv(Config.validation_path)
    joint_probs = np.zeros((val_df.shape[0], 5))
    start_idx = np.zeros((val_df.shape[0], 5))
    end_idx = np.zeros((val_df.shape[0], 5))
    models_paths = list(
        Path(Config.Train.checkpoint_dir / Config.model_type).iterdir())
    for i, path in enumerate(models_paths):
        tf.keras.backend.clear_session()

        _generator = RobertaDataGenerator(val_df, augment=False)
        dataset = tf.data.Dataset.from_generator(_generator.generate,
                                                 output_types=({
                                                     'ids': tf.int32,
                                                     'att': tf.int32,
                                                     'tti': tf.int32
                                                 }, {
                                                     'sts': tf.int32,
                                                     'ets': tf.int32
                                                 }))
        dataset = dataset.padded_batch(Config.Train.batch_size,
                                       padded_shapes=({
                                           'ids': [max_l],
                                           'att': [max_l],
                                           'tti': [max_l]
                                       }, {
                                           'sts': [max_l],
                                           'ets': [max_l]
                                       }),
                                       padding_values=({
                                           'ids': 1,
                                           'att': 0,
                                           'tti': 0
                                       }, {
                                           'sts': 0,
                                           'ets': 0
                                       }))
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

        model = get_roberta()
        model.load_weights(str(path))

        s_idx, e_idx = model.predict(dataset, verbose=1)
        joint_probs[:, i] = np.max(s_idx, axis=-1) * np.max(e_idx, axis=-1)
        start_idx[:, i] = np.argmax(s_idx, axis=-1)
        end_idx[:, i] = np.argmax(e_idx, axis=-1)
    selection_idx = np.argmax(joint_probs, axis=-1)
    start_idx = start_idx[:, selection_idx][:, 0]
    end_idx = end_idx[:, selection_idx][:, 0]
    jaccard = get_jaccard_from_df(val_df, start_idx.astype('int'),
                                  end_idx.astype('int'), 'roberta', None)
    print(f'\nMax joint probability jaccard: {jaccard}\n')
示例#3
0
文件: train.py 项目: ashwan1/TSE-2020
def train_albert(train_df, val_df, fold_i, augment=False):
    max_l = Config.Albert.max_len
    _train_generator = AlbertDataGenerator(train_df, augment=augment)
    train_dataset = tf.data.Dataset.from_generator(_train_generator.generate,
                                                   output_types=({
                                                       'ids':
                                                       tf.int32,
                                                       'att':
                                                       tf.int32,
                                                       'tti':
                                                       tf.int32
                                                   }, {
                                                       'sts':
                                                       tf.int32,
                                                       'ets':
                                                       tf.int32
                                                   }))
    train_dataset = train_dataset.padded_batch(Config.Train.batch_size,
                                               padded_shapes=({
                                                   'ids': [None],
                                                   'att': [None],
                                                   'tti': [None]
                                               }, {
                                                   'sts': [None],
                                                   'ets': [None]
                                               }))
    train_dataset = train_dataset.repeat().prefetch(
        tf.data.experimental.AUTOTUNE)

    _val_generator = AlbertDataGenerator(val_df, augment=False)
    val_dataset = tf.data.Dataset.from_generator(_val_generator.generate,
                                                 output_types=({
                                                     'ids': tf.int32,
                                                     'att': tf.int32,
                                                     'tti': tf.int32
                                                 }, {
                                                     'sts': tf.int32,
                                                     'ets': tf.int32
                                                 }))
    val_dataset = val_dataset.padded_batch(Config.Train.batch_size,
                                           padded_shapes=({
                                               'ids': [None],
                                               'att': [None],
                                               'tti': [None]
                                           }, {
                                               'sts': [None],
                                               'ets': [None]
                                           }))
    val_dataset = val_dataset.repeat().prefetch(tf.data.experimental.AUTOTUNE)

    model = get_albert()
    if fold_i == 0:
        model.summary()
    model_name = f'weights_v{Config.version}_f{fold_i + 1}.h5'

    train_spe = get_steps(train_df)
    val_spe = get_steps(val_df)

    cbs = [
        WarmUpCosineDecayScheduler(6e-5,
                                   1200,
                                   warmup_steps=300,
                                   hold_base_rate_steps=200,
                                   verbose=0),
        keras.callbacks.ModelCheckpoint(str(Config.Train.checkpoint_dir /
                                            Config.model_type / model_name),
                                        verbose=1,
                                        save_best_only=True,
                                        save_weights_only=True)
    ]
    model.fit(train_dataset,
              epochs=2,
              verbose=1,
              validation_data=val_dataset,
              callbacks=cbs,
              steps_per_epoch=train_spe,
              validation_steps=val_spe)

    print(f'Loading checkpoint {model_name}...')
    model.load_weights(
        str(Config.Train.checkpoint_dir / Config.model_type / model_name))

    _val_generator = AlbertDataGenerator(val_df, augment=False)
    val_dataset = tf.data.Dataset.from_generator(_val_generator.generate,
                                                 output_types=({
                                                     'ids': tf.int32,
                                                     'att': tf.int32,
                                                     'tti': tf.int32
                                                 }, {
                                                     'sts': tf.int32,
                                                     'ets': tf.int32
                                                 }))
    val_dataset = val_dataset.padded_batch(Config.Train.batch_size,
                                           padded_shapes=({
                                               'ids': [max_l],
                                               'att': [max_l],
                                               'tti': [max_l]
                                           }, {
                                               'sts': [max_l],
                                               'ets': [max_l]
                                           }))
    val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    s_idx, e_idx = model.predict(val_dataset, verbose=1)
    s_idx = np.argmax(s_idx, axis=-1)
    e_idx = np.argmax(e_idx, axis=-1)
    jaccard_score = get_jaccard_from_df(val_df, s_idx, e_idx, 'albert',
                                        'albert.csv')
    print(
        f'\n>>> Fold {fold_i + 1}: jaccard_score for albert: {jaccard_score}\n'
    )
    return jaccard_score