Пример #1
0
    def on_epoch_end(self, epoch, logs=None):
        if logs is None:
            logs = {}

        self.multi = self.multi or len(self.model.outputs) > 1
        val_predict = self.model.predict(self.dev_x)
        if self.multi:
            val_predict = val_predict[0]
        if val_predict.shape[1] == 1:
            # Regression
            val_predict = rescale_regression_results(
                val_predict, self.highest_class
            ).ravel()
        if self.ranked:
            val_predict = K.eval(ranked_prediction(val_predict))
        logger.debug('Transformed predict\n%r', val_predict[:5])
        _val_f1 = f1_metric(self.dev_y, val_predict, self.average)
        self.val_f1s.append(_val_f1)
        logs['val_f1'] = _val_f1
        if _val_f1 > self.best_f1:
            print(
                "Epoch %d: val_f1 improved from %f to %f: saving weights as %s"
                % (epoch + 1, self.best_f1, _val_f1, self.weights_path)
            )
            self.best_f1 = _val_f1
            self.model.save_weights(self.weights_path)
        else:
            print(
                "Epoch %d: val_f1 did not improve (%f >= %f)"
                % (epoch + 1, self.best_f1, _val_f1)
            )
Пример #2
0
def main():
    args = parse_args()
    model_paths = find_model_paths(args.job_ids)

    print(model_paths)

    if args.collapsed:
        test_meta = load_split("test", round_cefr=True)
        round_target_scores = np.array(
            [ROUND_CEFR_LABELS.index(c) for c in test_meta["cefr"]], dtype=int)
        targets = round_target_scores
        highest_class = 3
        labels = ROUND_CEFR_LABELS
    else:
        test_meta = load_split("test", round_cefr=False)
        target_scores = np.array(
            [CEFR_LABELS.index(c) for c in test_meta["cefr"]], dtype=int)
        targets = target_scores
        highest_class = 6
        labels = CEFR_LABELS

    for model_path in model_paths:
        model, w2i = load_model_and_w2i(model_path)

        multi_input = isinstance(model.input, list) and len(model.input) == 2
        multi_output = isinstance(model.outputs,
                                  list) and len(model.outputs) > 1

        x = get_input_reps(w2i, multi_input)
        del w2i
        predictions = get_predictions(model, x, multi_output)
        del x
        del model

        # Round to integers and clip to score range
        pred = rescale_regression_results(predictions, highest_class).ravel()
        report(targets, pred, labels)

        name = model_path.stem + "_test_eval"
        save_results(name, {}, {}, targets, pred)
Пример #3
0
def test_rescale_regression_results():
    num_class = 7
    y = np.array([1, 2, 6, 4, 0, 2, 1, 5, 1, 0, 4, 4, 6, 2, 5])
    norm_y = y / num_class
    assert_array_equal(rescale_regression_results(norm_y, num_class), y)
Пример #4
0
def main():
    args = parse_args()

    set_reproducible(args.seed_delta)

    train_meta = load_split("train", round_cefr=args.round_cefr)
    dev_meta = load_split("dev", round_cefr=args.round_cefr)

    target_col = "lang" if args.nli else "cefr"
    labels = sorted(train_meta[target_col].unique())

    train_x, dev_x, num_pos, w2i = get_sequence_input_reps(args)
    args.vocab_size = len(w2i)
    print("Vocabulary size is {}".format(args.vocab_size))

    train_target_scores = np.array(
        [labels.index(c) for c in train_meta[target_col]], dtype=int)
    dev_target_scores = np.array(
        [labels.index(c) for c in dev_meta[target_col]], dtype=int)
    del target_col

    train_y, dev_y, output_units = get_targets_and_output_units(
        train_target_scores, dev_target_scores, args.method)

    optimizer, loss, metrics = get_compile_args(args.method, args.lr)
    multi_task = args.aux_loss_weight > 0
    if multi_task:
        assert not args.nli, "Both NLI and multi-task specified"
        lang_labels = sorted(train_meta.lang.unique())
        train_y.append(
            to_categorical([lang_labels.index(l) for l in train_meta.lang]))
        dev_y.append(
            to_categorical([lang_labels.index(l) for l in dev_meta.lang]))
        output_units.append(len(lang_labels))
        loss_weights = {
            AUX_OUTPUT_NAME: args.aux_loss_weight,
            OUTPUT_NAME: 1.0 - args.aux_loss_weight,
        }
    else:
        loss = loss[OUTPUT_NAME]
        metrics = metrics[OUTPUT_NAME]
        loss_weights = None
    del train_meta, dev_meta

    model = build_model(
        args.vocab_size,
        args.doc_length,
        output_units,
        args.embed_dim,
        windows=args.windows,
        num_pos=num_pos,
        constraint=args.constraint,
        static_embs=args.static_embs,
        classification=args.method == "classification",
    )
    model.summary()

    if args.vectors:
        init_pretrained_embs(model, args.vectors, w2i)

    model.compile(optimizer=optimizer,
                  loss=loss,
                  loss_weights=loss_weights,
                  metrics=metrics)

    logger.debug("Train y\n%r", train_y[0][:5])
    logger.debug("Model config\n%r", model.get_config())

    temp_handle, weights_path = tempfile.mkstemp(suffix=".h5")
    val_y = dev_target_scores
    callbacks = [
        F1Metrics(dev_x, val_y, weights_path, ranked=args.method == "ranked")
    ]
    history = model.fit(
        train_x,
        train_y,
        epochs=args.epochs,
        batch_size=args.batch_size,
        callbacks=callbacks,
        validation_data=(dev_x, dev_y),
        verbose=2,
    )
    model.load_weights(weights_path)
    os.close(temp_handle)
    os.remove(weights_path)

    true = dev_target_scores
    if multi_task:
        predictions = model.predict(dev_x)[0]
    else:
        predictions = model.predict(dev_x)
    if args.method == "classification":
        pred = np.argmax(predictions, axis=1)
    elif args.method == "regression":
        # Round to integers and clip to score range
        highest_class = train_target_scores.max()
        pred = rescale_regression_results(predictions, highest_class).ravel()
    elif args.method == "ranked":
        pred = K.eval(ranked_prediction(predictions))
    try:
        if multi_task:
            multi_task_report(history.history, true, pred, labels)
        else:
            report(true, pred, labels)
    except Exception:
        pass

    name = get_name(args.nli, multi_task)
    name = get_file_name(name)

    if args.save_model:
        save_model(name, model, w2i)

    save_results(name, args.__dict__, history.history, true, pred)

    plt.show()
Пример #5
0
def main():
    args = parse_args()

    set_reproducible(args.seed_delta)

    train_meta = load_split('train', round_cefr=args.round_cefr)
    dev_meta = load_split('dev', round_cefr=args.round_cefr)

    target_col = 'lang' if args.nli else 'cefr'
    labels = sorted(train_meta[target_col].unique())

    train_x, dev_x, num_pos, w2i = get_sequence_input_reps(args)
    args.vocab_size = len(w2i)
    print("Vocabulary size is {}".format(args.vocab_size))

    train_target_scores = np.array(
        [labels.index(c) for c in train_meta[target_col]], dtype=int)
    dev_target_scores = np.array(
        [labels.index(c) for c in dev_meta[target_col]], dtype=int)
    del target_col

    train_y, dev_y, output_units = get_targets_and_output_units(
        train_target_scores, dev_target_scores, args.method)

    optimizer, loss, metrics = get_compile_args(args)
    multi_task = args.aux_loss_weight > 0
    if multi_task:
        assert not args.nli, "Both NLI and multi-task specified"
        lang_labels = sorted(train_meta.lang.unique())
        train_y.append(
            to_categorical([lang_labels.index(l) for l in train_meta.lang]))
        dev_y.append(
            to_categorical([lang_labels.index(l) for l in dev_meta.lang]))
        output_units.append(len(lang_labels))
        loss_weights = {
            AUX_OUTPUT_NAME: args.aux_loss_weight,
            OUTPUT_NAME: 1.0 - args.aux_loss_weight,
        }
    else:
        loss = loss[OUTPUT_NAME]
        metrics = metrics[OUTPUT_NAME]
        loss_weights = None

    model = build_model(args, output_units=output_units, num_pos=num_pos)
    model.summary()

    if args.vectors:
        init_pretrained_embs(model, args.vectors, w2i)

    model.compile(optimizer=optimizer,
                  loss=loss,
                  loss_weights=loss_weights,
                  metrics=metrics)

    # Context manager fails on Windows (can't open an open file again)
    temp_handle, weights_path = tempfile.mkstemp(suffix='.h5')
    val_y = dev_target_scores
    callbacks = [
        F1Metrics(dev_x, val_y, weights_path, ranked=args.method == 'ranked')
    ]
    history = model.fit(
        train_x,
        train_y,
        epochs=args.epochs,
        batch_size=args.batch_size,
        callbacks=callbacks,
        validation_data=(dev_x, dev_y),
        verbose=2,
    )
    model.load_weights(weights_path)
    os.close(temp_handle)
    os.remove(weights_path)

    predictions = get_predictions(model, dev_x, multi_task)
    true = dev_target_scores
    if args.method == 'classification':
        pred = np.argmax(predictions, axis=1)
    elif args.method == 'regression':
        # Round to integers and clip to score range
        highest_class = train_target_scores.max()
        pred = rescale_regression_results(predictions, highest_class).ravel()
    elif args.method == 'ranked':
        pred = K.eval(ranked_prediction(predictions))
    try:
        if multi_task:
            multi_task_report(history.history, true, pred, labels)
        else:
            report(true, pred, labels)
    except Exception:
        pass

    if args.nli:
        name = 'rnn-nli'
    elif multi_task:
        name = 'rnn-multi'
    else:
        name = 'rnn'
    name = get_file_name(name)

    if args.save_model:
        save_model(name, model, w2i)

    save_results(name, args.__dict__, history.history, true, pred)

    plt.show()
Пример #6
0
def main():
    args = parse_args()

    set_reproducible(args.seed_delta)
    do_classification = args.method == 'classification'

    train_meta = load_split('train', round_cefr=args.round_cefr)
    dev_meta = load_split('dev', round_cefr=args.round_cefr)

    kind = args.featuretype
    train_x, dev_x, num_features = preprocess(
        kind, args.max_features, train_meta, dev_meta
    )

    target_col = 'lang' if args.nli else 'cefr'
    labels = sorted(train_meta[target_col].unique())

    train_target_scores = np.array(
        [labels.index(c) for c in train_meta[target_col]], dtype=int
    )
    dev_target_scores = np.array(
        [labels.index(c) for c in dev_meta[target_col]], dtype=int
    )

    train_y, dev_y, output_units = get_targets_and_output_units(
        train_target_scores, dev_target_scores, args.method
    )

    multi_task = args.aux_loss_weight > 0
    if multi_task:
        assert not args.nli, "Both NLI and multi-task specified"
        lang_labels = sorted(train_meta.lang.unique())
        train_y.append(to_categorical([lang_labels.index(l) for l in train_meta.lang]))
        dev_y.append(to_categorical([lang_labels.index(l) for l in dev_meta.lang]))
        output_units.append(len(lang_labels))
        loss_weights = {
            AUX_OUTPUT_NAME: args.aux_loss_weight,
            OUTPUT_NAME: 1.0 - args.aux_loss_weight,
        }
    else:
        loss_weights = None
    del train_meta, dev_meta

    model = build_model(num_features, output_units, do_classification)
    model.summary()

    optimizer, loss, metrics = get_compile_args(args.method, args.lr)
    model.compile(
        optimizer=optimizer, loss=loss, loss_weights=loss_weights, metrics=metrics
    )

    # Context manager fails on Windows (can't open an open file again)
    temp_handle, weights_path = tempfile.mkstemp(suffix='.h5')
    val_y = dev_target_scores
    callbacks = [F1Metrics(dev_x, val_y, weights_path, ranked=args.method == 'ranked')]
    history = model.fit(
        train_x,
        train_y,
        epochs=args.epochs,
        batch_size=args.batch_size,
        callbacks=callbacks,
        validation_data=(dev_x, dev_y),
        verbose=2,
    )
    model.load_weights(weights_path)
    os.close(temp_handle)
    os.remove(weights_path)

    true = dev_target_scores
    if multi_task:
        predictions = model.predict(dev_x)[0]
    else:
        predictions = model.predict(dev_x)
    if args.method == 'classification':
        pred = np.argmax(predictions, axis=1)
    elif args.method == 'regression':
        # Round to integers and clip to score range
        highest_class = train_target_scores.max()
        pred = rescale_regression_results(predictions, highest_class).ravel()
    elif args.method == 'ranked':
        pred = K.eval(ranked_prediction(predictions))
    if multi_task:
        multi_task_report(history.history, true, pred, labels)
    else:
        report(true, pred, labels)

    plt.show()

    prefix = 'mlp_%s' % args.featuretype
    fname = get_file_name(prefix)
    save_results(fname, args.__dict__, history.history, true, pred)

    if args.save_model:
        save_model(fname, model, None)