Пример #1
0
def build_simple_cnn_text_classifier(tok2vec,
                                     nr_class,
                                     exclusive_classes=False,
                                     **cfg):
    """
    Build a simple CNN text classifier, given a token-to-vector model as inputs.
    If exclusive_classes=True, a softmax non-linearity is applied, so that the
    outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
    is applied instead, so that outputs are in the range [0, 1].
    """
    with Model.define_operators({">>": chain}):
        if exclusive_classes:
            output_layer = Softmax(nr_class, tok2vec.nO)
        else:
            output_layer = (zero_init(
                Affine(nr_class, tok2vec.nO, drop_factor=0.0)) >> logistic)
        model = tok2vec >> flatten_add_lengths >> Pooling(
            mean_pool) >> output_layer
    model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    return model
Пример #2
0
def main(depth=2, width=512, nb_epoch=30):
    if CupyOps.xp != None:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    # Configuration here isn't especially good. But, for demo..
    with Model.define_operators({'**': clone, '>>': chain}):
        model = ReLu(width) >> ReLu(width) >> Softmax()

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = model.ops.unzip(train_data)
    dev_X, dev_y = model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    with model.begin_training(train_X, train_y,
                              L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.]

        def report_progress():
            with model.use_params(optimizer.averages):
                print(epoch_loss[-1], model.evaluate(dev_X, dev_y),
                      trainer.dropout)
            epoch_loss.append(0.)

        trainer.each_epoch.append(report_progress)
        trainer.nb_epoch = nb_epoch
        trainer.dropout = 0.3
        trainer.batch_size = 128
        trainer.dropout_decay = 0.0
        train_X = model.ops.asarray(train_X, dtype='float32')
        y_onehot = to_categorical(train_y)
        for X, y in trainer.iterate(train_X, y_onehot):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2.).sum() / y.shape[0]
            backprop(yh - y, optimizer)
            epoch_loss[-1] += loss
        with model.use_params(optimizer.averages):
            print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
            with open('out.pickle', 'wb') as file_:
                pickle.dump(model, file_, -1)
Пример #3
0
def build_tagger_model(nr_class, **cfg):
    embed_size = util.env_opt('embed_size', 7000)
    if 'token_vector_width' in cfg:
        token_vector_width = cfg['token_vector_width']
    else:
        token_vector_width = util.env_opt('token_vector_width', 128)
    pretrained_dims = cfg.get('pretrained_dims', 0)
    with Model.define_operators({'>>': chain, '+': add}):
        if 'tok2vec' in cfg:
            tok2vec = cfg['tok2vec']
        else:
            tok2vec = Tok2Vec(token_vector_width, embed_size,
                              pretrained_dims=pretrained_dims)
        softmax = with_flatten(Softmax(nr_class, token_vector_width))
        model = (
            tok2vec
            >> softmax
        )
    model.nI = None
    model.tok2vec = tok2vec
    model.softmax = softmax
    return model
Пример #4
0
def main(width=32, nr_vector=1000):
    train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True)

    model = with_flatten(
        chain(
            HashEmbed(width, nr_vector),
            ReLu(width, width),
            ReLu(width, width),
            Softmax(nr_tag, width),
        ))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y]
    dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y]
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop([yh[i] - y[i] for i in range(len(yh))], optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Пример #5
0
def build_model(nr_class, width, depth, conv_depth, **kwargs):
    with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}):
        embed = (
            (HashEmbed(width, 5000, column=1)
            | StaticVectors('spacy_pretrained_vectors', width, column=5)
            | HashEmbed(width//2, 750, column=2)
            | HashEmbed(width//2, 750, column=3)
            | HashEmbed(width//2, 750, column=4))
            >> LN(Maxout(width))
        )

        sent2vec = (
            flatten_add_lengths
            >> with_getitem(0,
                embed
                >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth
            )
            >> ParametricAttention(width)
            >> Pooling(sum_pool)
            >> Residual(LN(Maxout(width))) ** depth
        )

        model = (
            foreach(sent2vec, drop_factor=2.0)
            >> flatten_add_lengths
            # This block would allow the model to learn some cross-sentence
            # features. It's not useful on this problem. It might make more
            # sense to use a BiLSTM here, following Liang et al (2016).
            #>> with_getitem(0,
            #    Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth
            #)
            >> ParametricAttention(width, hard=False)
            >> Pooling(sum_pool)
            >> Residual(LN(Maxout(width))) ** depth
            >> Softmax(nr_class)
        )
    model.lsuv = False
    return model
Пример #6
0
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs):
    with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}):
        embed = (HashEmbed(width, 5000, column=1)
                 | StaticVectors(vectors_name, width, column=5)
                 | HashEmbed(width // 2, 750, column=2)
                 | HashEmbed(width // 2, 750, column=3)
                 | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))

        sent2vec = (with_flatten(embed) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(
                Maxout(width, width, pieces=3))) >> flatten_add_lengths >>
                    ParametricAttention(width, hard=False) >>
                    Pooling(mean_pool) >> Residual(LN(Maxout(width))))

        model = (foreach(sent2vec, drop_factor=2.0) >> Residual(
            prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >>
            MultiHeadedAttention() >> with_flatten(LN(Affine(width, width))))
                 >> flatten_add_lengths >> ParametricAttention(
                     width, hard=False) >> Pooling(mean_pool) >> Residual(
                         LN(Maxout(width)))**2 >> Softmax(nr_class))
    model.lsuv = False
    return model
Пример #7
0
def build_tagger_model(nr_class, **cfg):
    embed_size = util.env_opt("embed_size", 2000)
    if "token_vector_width" in cfg:
        token_vector_width = cfg["token_vector_width"]
    else:
        token_vector_width = util.env_opt("token_vector_width", 96)
    pretrained_vectors = cfg.get("pretrained_vectors")
    subword_features = cfg.get("subword_features", True)
    with Model.define_operators({">>": chain, "+": add}):
        if "tok2vec" in cfg:
            tok2vec = cfg["tok2vec"]
        else:
            tok2vec = Tok2Vec(
                token_vector_width,
                embed_size,
                subword_features=subword_features,
                pretrained_vectors=pretrained_vectors,
            )
        softmax = with_flatten(Softmax(nr_class, token_vector_width))
        model = tok2vec >> softmax
    model.nI = None
    model.tok2vec = tok2vec
    model.softmax = softmax
    return model
Пример #8
0
def build_text_classifier(nr_class, width=64, **cfg):
    depth = cfg.get("depth", 2)
    nr_vector = cfg.get("nr_vector", 5000)
    pretrained_dims = cfg.get("pretrained_dims", 0)
    with Model.define_operators({
            ">>": chain,
            "+": add,
            "|": concatenate,
            "**": clone
    }):
        if cfg.get("low_data") and pretrained_dims:
            model = (SpacyVectors >> flatten_add_lengths >> with_getitem(
                0, Affine(width, pretrained_dims)) >>
                     ParametricAttention(width) >> Pooling(sum_pool) >>
                     Residual(ReLu(width, width))**2 >> zero_init(
                         Affine(nr_class, width, drop_factor=0.0)) >> logistic)
            return model

        lower = HashEmbed(width, nr_vector, column=1)
        prefix = HashEmbed(width // 2, nr_vector, column=2)
        suffix = HashEmbed(width // 2, nr_vector, column=3)
        shape = HashEmbed(width // 2, nr_vector, column=4)

        trained_vectors = FeatureExtracter(
            [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten(
                uniqued(
                    (lower | prefix | suffix | shape) >> LN(
                        Maxout(width, width + (width // 2) * 3)),
                    column=0,
                ))

        if pretrained_dims:
            static_vectors = SpacyVectors >> with_flatten(
                Affine(width, pretrained_dims))
            # TODO Make concatenate support lists
            vectors = concatenate_lists(trained_vectors, static_vectors)
            vectors_width = width * 2
        else:
            vectors = trained_vectors
            vectors_width = width
            static_vectors = None
        tok2vec = vectors >> with_flatten(
            LN(Maxout(width, vectors_width)) >> Residual(
                (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**depth,
            pad=depth,
        )
        cnn_model = (
            tok2vec >> flatten_add_lengths >> ParametricAttention(width) >>
            Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >>
            zero_init(Affine(nr_class, width, drop_factor=0.0)))

        linear_model = build_bow_text_classifier(nr_class,
                                                 ngram_size=cfg.get(
                                                     "ngram_size", 1),
                                                 exclusive_classes=False)
        if cfg.get("exclusive_classes"):
            output_layer = Softmax(nr_class, nr_class * 2)
        else:
            output_layer = (zero_init(
                Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic)
        model = (linear_model | cnn_model) >> output_layer
        model.tok2vec = chain(tok2vec, flatten)
    model.nO = nr_class
    model.lsuv = False
    return model
Пример #9
0
def main(width=100,
         depth=4,
         vector_length=64,
         min_batch_size=4,
         max_batch_size=32,
         learn_rate=0.001,
         momentum=0.9,
         dropout=0.0,
         dropout_decay=1e-4,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    prefer_gpu()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate,
            '&': concatenate_ragged
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (flatten_add_lengths >> with_getitem(
            0, (lower_case | shape | prefix | suffix) >> LayerNorm(
                Maxout(width, pieces=3))) >> concatenate_ragged(
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1),
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1),
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1),
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1)) >>
                 with_getitem(0, Softmax(nr_tag)) >> unflatten)

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open('/tmp/model.pickle', 'wb') as file_:
            pickle.dump(model, file_)
Пример #10
0
def main(
    width=100,
    depth=4,
    vector_length=64,
    min_batch_size=1,
    max_batch_size=32,
    learn_rate=0.001,
    momentum=0.9,
    dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    cfg = dict(locals())
    print(cfg)
    prefer_gpu()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            "**": clone,
            ">>": chain,
            "+": add,
            "|": concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth
            >> Softmax(nr_tag),
            pad=depth,
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (
            trainer,
            optimizer,
    ):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open("/tmp/model.pickle", "wb") as file_:
            pickle.dump(model, file_)
Пример #11
0
def main(
    width=128,
    depth=1,
    vector_length=128,
    min_batch_size=16,
    max_batch_size=16,
    learn_rate=0.001,
    momentum=0.9,
    dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    using_gpu = prefer_gpu()
    if using_gpu:
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
    cfg = dict(locals())
    print(cfg)
    train_data, check_data, nr_tag = ancora_pos_tags()
    train_data = list(train_data)
    check_data = list(check_data)

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({
            "**": clone,
            ">>": chain,
            "+": add,
            "|": concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3))
                 >> PyTorchBiLSTM(width, width, depth) >> with_flatten(
                     Softmax(nr_tag)))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:10], train_y[:10], **cfg) as (
            trainer,
            optimizer,
    ):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
    with open("/tmp/model.pickle", "wb") as file_:
        pickle.dump(model, file_)