Пример #1
0
def MishWindowEncoder(config):
    from thinc.v2v import Mish

    nO = config["width"]
    nW = config["window_size"]
    depth = config["depth"]

    cnn = chain(ExtractWindow(nW=nW), LayerNorm(Mish(nO, nO * ((nW * 2) + 1))))
    model = clone(Residual(cnn), depth)
    model.nO = nO
    return model
Пример #2
0
def MaxoutWindowEncoder(config):
    nO = config["width"]
    nW = config["window_size"]
    nP = config["pieces"]
    depth = config["depth"]

    cnn = chain(ExtractWindow(nW=nW),
                LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)))
    model = clone(Residual(cnn), depth)
    model.nO = nO
    model.receptive_field = nW * depth
    return model
Пример #3
0
def main(width=100,
         depth=4,
         vector_length=64,
         min_batch_size=4,
         max_batch_size=32,
         learn_rate=0.001,
         momentum=0.9,
         dropout=0.0,
         dropout_decay=1e-4,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    prefer_gpu()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate,
            '&': concatenate_ragged
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (flatten_add_lengths >> with_getitem(
            0, (lower_case | shape | prefix | suffix) >> LayerNorm(
                Maxout(width, pieces=3))) >> concatenate_ragged(
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1),
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1),
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1),
                    SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1)) >>
                 with_getitem(0, Softmax(nr_tag)) >> unflatten)

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open('/tmp/model.pickle', 'wb') as file_:
            pickle.dump(model, file_)
Пример #4
0
def LayerNormalizedMaxout(config):
    width = config["width"]
    pieces = config["pieces"]
    layer = LayerNorm(Maxout(width, pieces=pieces))
    layer.nO = width
    return layer