def MishWindowEncoder(config): from thinc.v2v import Mish nO = config["width"] nW = config["window_size"] depth = config["depth"] cnn = chain(ExtractWindow(nW=nW), LayerNorm(Mish(nO, nO * ((nW * 2) + 1)))) model = clone(Residual(cnn), depth) model.nO = nO return model
def MaxoutWindowEncoder(config): nO = config["width"] nW = config["window_size"] nP = config["pieces"] depth = config["depth"] cnn = chain(ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP))) model = clone(Residual(cnn), depth) model.nO = nO model.receptive_field = nW * depth return model
def main(width=100, depth=4, vector_length=64, min_batch_size=4, max_batch_size=32, learn_rate=0.001, momentum=0.9, dropout=0.0, dropout_decay=1e-4, nb_epoch=20, L2=1e-6): cfg = dict(locals()) print(cfg) prefer_gpu() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) Model.lsuv = True with Model.define_operators({ '**': clone, '>>': chain, '+': add, '|': concatenate, '&': concatenate_ragged }): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = (flatten_add_lengths >> with_getitem( 0, (lower_case | shape | prefix | suffix) >> LayerNorm( Maxout(width, pieces=3))) >> concatenate_ragged( SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1), SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1), SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1), SelfAttention(nK=16, nO=16, nI=width, nL=1, nR=1)) >> with_getitem(0, Softmax(nr_tag)) >> unflatten) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open('/tmp/model.pickle', 'wb') as file_: pickle.dump(model, file_)
def LayerNormalizedMaxout(config): width = config["width"] pieces = config["pieces"] layer = LayerNorm(Maxout(width, pieces=pieces)) layer.nO = width return layer