Exemplo n.º 1
0
def main(loc,
         width=64,
         depth=2,
         batch_size=128,
         dropout=0.5,
         dropout_decay=1e-5,
         nb_epoch=20):
    print("Load spaCy")
    nlp = spacy.load('en',
                     parser=False,
                     entity=False,
                     matcher=False,
                     tagger=False)
    print("Construct model")
    Model.ops = CupyOps()
    with Model.define_operators({'>>': chain, '**': clone, '|': concatenate}):
        mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width * 3)
        sent2vec = (get_word_ids >> flatten_add_lengths >> with_getitem(
            0,
            SpacyVectors(nlp, width) >> mwe_encode**depth) >> Pooling(
                mean_pool, max_pool))
        model = (((Arg(0) >> sent2vec) |
                  (Arg(1) >> sent2vec)) >> Maxout(width, width * 4) >> Maxout(
                      width, width)**depth >> Softmax(2, width))

    print("Read and parse quora data")
    rows = read_quora_tsv_data(pathlib.Path(loc))
    train, dev = partition(rows, 0.9)
    train_X, train_y = create_data(model.ops, nlp, train)
    dev_X, dev_y = create_data(model.ops, nlp, dev)
    print("Train")
    with model.begin_training(train_X[:20000],
                              train_y[:20000]) as (trainer, optimizer):
        trainer.batch_size = batch_size
        trainer.nb_epoch = nb_epoch
        trainer.dropout = dropout
        trainer.dropout_decay = dropout_decay

        epoch_times = [timer()]
        epoch_loss = [0.]
        n_train_words = sum(len(d0) + len(d1) for d0, d1 in train_X)
        n_dev_words = sum(len(d0) + len(d1) for d0, d1 in dev_X)

        def track_progress():
            stats = get_stats(model, optimizer.averages, dev_X, dev_y,
                              epoch_loss[-1], epoch_times[-1], n_train_words,
                              n_dev_words)
            stats.append(trainer.dropout)
            stats = tuple(stats)
            print(
                len(epoch_loss),
                "%.3f loss, %.3f (%.3f) acc, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f"
                % stats)
            epoch_times.append(timer())
            epoch_loss.append(0.)

        trainer.each_epoch.append(track_progress)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop(yh - y, optimizer)
Exemplo n.º 2
0
def main(width=100,
         depth=4,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         learn_rate=0.001,
         momentum=0.9,
         dropout=0.5,
         dropout_decay=1e-4,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth
            >> Softmax(nr_tag),
            pad=depth))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open('/tmp/model.pickle', 'wb') as file_:
            pickle.dump(model, file_)
Exemplo n.º 3
0
def main(width=64,
         depth=2,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         dropout=0.9,
         dropout_decay=1e-3,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = Embed(width, vector_length, 5000, column=0)
        prefix = Embed(width, vector_length, 5000, column=2)
        suffix = Embed(width, vector_length, 5000, column=3)

        model = (layerize(flatten_sequences) >> (lower_case + prefix + suffix)
                 >> Residual(ExtractWindow(nW=1) >> Maxout(width))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2).sum() / y.shape[0]
            if loss > 0.:
                optimizer.set_loss(loss)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            if epoch_train_acc / n_train >= 0.999:
                break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
Exemplo n.º 4
0
def main(width=300, depth=4, vector_length=64,
         min_batch_size=1, max_batch_size=32,
        dropout=0.9, dropout_decay=1e-3, nb_epoch=20, L2=1e-6,
         device="cpu"):
    cfg = dict(locals())
    print(cfg, file=sys.stderr)
    if cupy is not None and device != 'cpu':
        print("Using GPU", file=sys.stderr)
        Model.ops = CupyOps()
        Model.ops.device = device
    train_data, check_data, tag_map = twitter_ner()
    dev_words, dev_tags = list(zip(*check_data))
    nr_tag = len(tag_map)

    extracter = FeatureExtracter('en', attrs=[ORTH, LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({'**': clone, '>>': chain, '+': add,
                                 '|': concatenate}):
        glove = StaticVectors('en', width//2, column=0)
        lower_case = (HashEmbed(width, 500, column=1) + HashEmbed(width, 100, column=1))
        shape = HashEmbed(width//2, 200, column=2)
        prefix     = HashEmbed(width//2, 100, column=3)
        suffix     = HashEmbed(width//2, 100, column=4)

        model = (
            layerize(flatten_sequences)
            >> (lower_case | shape | prefix | suffix)
            >> BN(Maxout(width, pieces=3), nO=width)
            >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, pieces=3), nO=width)) ** depth
            >> Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            #if epoch_train_acc / n_train >= 0.999:
            #    break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)), file=sys.stderr)
        print_dev_sentences(model, dev_words, dev_tags, dev_X, tag_map)
Exemplo n.º 5
0
def main(use_gpu=False, nb_epoch=100):
    if use_gpu:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    train, test = datasets.imdb(limit=2000)
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2))
    test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2))
    
    nlp = spacy.load('en_vectors_web_lg')
    nlp.add_pipe(nlp.create_pipe('sentencizer'), first=True)

    preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID])
    train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))]
    test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))]

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    n_sent = sum([len(list(sents)) for sents in train_X])
    print("%d sentences" % n_sent)

    model = build_model(2, width=128, conv_depth=2, depth=2,
                        train_X=train_X, train_y=train_y)
    with model.begin_training(train_X[:100], train_y[:100]) as (trainer, optimizer):
        epoch_loss = [0.]
        def report_progress():
            with model.use_params(optimizer.averages):
                print(epoch_loss[-1], epoch_var[-1], model.evaluate(dev_X, dev_y), trainer.dropout)
            epoch_loss.append(0.)
            epoch_var.append(0.)
        trainer.each_epoch.append(report_progress)
        batch_sizes = compounding(64, 64, 1.01)
        trainer.dropout = 0.3
        trainer.batch_size = int(next(batch_sizes))
        trainer.dropout_decay = 0.0
        trainer.nb_epoch = nb_epoch
        #optimizer.alpha = 0.1
        #optimizer.max_grad_norm = 10.0
        #optimizer.b1 = 0.0
        #optimizer.b2 = 0.0
        epoch_var = [0.]
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            losses = ((yh-y)**2.).sum(axis=1) / y.shape[0]
            epoch_var[-1] += losses.var()
            loss = losses.mean()
            backprop((yh-y)/yh.shape[0], optimizer)
            epoch_loss[-1] += loss
            trainer.batch_size = int(next(batch_sizes))
        with model.use_params(optimizer.averages):
            print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
Exemplo n.º 6
0
def use_gpu(gpu_id):
    try:
        import cupy.cuda.device
    except ImportError:
        return None
    from thinc.neural.ops import CupyOps
    device = cupy.cuda.device.Device(gpu_id)
    device.use()
    Model.ops = CupyOps()
    Model.Ops = CupyOps
    return device
Exemplo n.º 7
0
    def forward(X, drop=0.0):
        if isinstance(X, numpy.ndarray):
            ops = NumpyOps()
        else:
            ops = CupyOps()
        output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)

        def backward(y, sgd=None):
            dX = ops.allocate(X.shape)
            dX[:, idx] += y
            return dX

        return output, backward
Exemplo n.º 8
0
def main(use_gpu=False, nb_epoch=50):
    if use_gpu:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    train, test = datasets.imdb()
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = to_categorical(train_y, nb_classes=2)
    test_y = to_categorical(test_y, nb_classes=2)

    nlp = Language()

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    print("Parse data")
    train_X = [nlp.make_doc(x) for x in train_X]
    dev_X = [nlp.make_doc(x) for x in dev_X]

    model = build_model(2, 1)

    print("Begin training")
    with model.begin_training(train_X, train_y,
                              L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.]

        def report_progress():
            with model.use_params(optimizer.averages):
                print(epoch_loss[-1], model.evaluate(dev_X, dev_y),
                      trainer.dropout)
            epoch_loss.append(0.)

        trainer.each_epoch.append(report_progress)
        trainer.nb_epoch = nb_epoch
        trainer.dropout = 0.0
        trainer.batch_size = 128
        trainer.dropout_decay = 0.0
        for X, y in trainer.iterate(train_X[:1000], train_y[:1000]):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2.).sum() / y.shape[0]
            backprop((yh - y) / y.shape[0], optimizer)
            epoch_loss[-1] += loss
        with model.use_params(optimizer.averages):
            print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
            with open('out.pickle', 'wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 9
0
def main(length=1000, nO=32, nI=32):
    if CupyOps.xp is not None:
        print("Use GPU")
        Model.ops = CupyOps()
        Model.Ops = CupyOps
        torch.set_default_tensor_type("torch.cuda.FloatTensor")

    pt_model = nn.Linear(nI, nO)
    optimizer = torch.optim.Adam(pt_model.parameters())  # noqa: F841

    model = PyTorchWrapper(pt_model)

    X = Model.ops.xp.ones((length, nI), dtype="f")
    y = 1.0 / X
    for i in range(10):
        yh, get_dX = model.begin_update(X)
        dY = (yh - y) / len(y)
        dX = get_dX(dY)  # noqa: F841
Exemplo n.º 10
0
def main(length=1000, nO=32, nI=32):
    ''' Driver function '''
    if CupyOps.xp != None:
        print("Use GPU")
        Model.ops = CupyOps()
        Model.Ops = CupyOps
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        print("GPU not available. Running on CPU.")
    pt_model = nn.Linear(nI, nO)
    optimizer = torch.optim.Adam(pt_model.parameters())  # noqa: F841

    model = PyTorchWrapper(pt_model)

    X = Model.ops.xp.ones((length, nI), dtype="f")
    y = 1.0 / X
    for i in range(10):
        yh, get_dX = model.begin_update(X)
        dY = (yh - y) / len(y)
        dX = get_dX(dY)  # noqa: F841
Exemplo n.º 11
0
def main(depth=2, width=512, nb_epoch=30):
    if CupyOps.xp != None:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    # Configuration here isn't especially good. But, for demo..
    with Model.define_operators({'**': clone, '>>': chain}):
        model = ReLu(width) >> ReLu(width) >> Softmax()

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = model.ops.unzip(train_data)
    dev_X, dev_y = model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    with model.begin_training(train_X, train_y,
                              L2=1e-6) as (trainer, optimizer):
        epoch_loss = [0.]

        def report_progress():
            with model.use_params(optimizer.averages):
                print(epoch_loss[-1], model.evaluate(dev_X, dev_y),
                      trainer.dropout)
            epoch_loss.append(0.)

        trainer.each_epoch.append(report_progress)
        trainer.nb_epoch = nb_epoch
        trainer.dropout = 0.3
        trainer.batch_size = 128
        trainer.dropout_decay = 0.0
        train_X = model.ops.asarray(train_X, dtype='float32')
        y_onehot = to_categorical(train_y)
        for X, y in trainer.iterate(train_X, y_onehot):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2.).sum() / y.shape[0]
            backprop(yh - y, optimizer)
            epoch_loss[-1] += loss
        with model.use_params(optimizer.averages):
            print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
            with open('out.pickle', 'wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 12
0
def main(width=128,
         depth=4,
         vector_length=64,
         max_batch_size=32,
         dropout=0.9,
         drop_decay=1e-4,
         nb_epoch=20,
         L2=1e-5):
    cfg = dict(locals())
    Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    with Model.define_operators({'**': clone, '>>': chain}):
        model = (layerize(flatten_sequences) >> Embed(width, vector_length) >>
                 (ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = 1
        batch_size = 1.
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
Exemplo n.º 13
0
def main(dataset='quora',
         width=50,
         depth=2,
         min_batch_size=1,
         max_batch_size=512,
         dropout=0.2,
         dropout_decay=0.0,
         pooling="mean+max",
         nb_epoch=5,
         pieces=3,
         L2=0.0,
         use_gpu=False,
         out_loc=None,
         quiet=False,
         job_id=None,
         ws_api_url=None,
         rest_api_url=None):
    global CTX
    if job_id is not None:
        CTX = neptune.Context()
        width = CTX.params.width
        L2 = CTX.params.L2
        nb_epoch = CTX.params.nb_epoch
        depth = CTX.params.depth
        max_batch_size = CTX.params.max_batch_size
    cfg = dict(locals())

    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == 'mean+max':
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)

    print("Load spaCy")
    nlp = get_spacy('en')

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    Model.lsuv = True
    #Model.ops = CupyOps()
    with Model.define_operators({
            '>>': chain,
            '**': clone,
            '|': concatenate,
            '+': add
    }):
        mwe_encode = ExtractWindow(nW=1) >> BN(
            Maxout(width, drop_factor=0.0, pieces=pieces))

        sent2vec = (  # List[spacy.token.Doc]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(
                0,
                #(StaticVectors('en', width)
                HashEmbed(width, 3000)
                #+ HashEmbed(width, 3000))
                #>> Residual(mwe_encode ** 2)
            )  # : word_ids{T}
            >> Pooling(mean_pool, max_pool)
            #>> Residual(BN(Maxout(width*2, pieces=pieces), nO=width*2)**2)
            >> Maxout(width * 2, pieces=pieces, drop_factor=0.0) >> logistic)
        model = Siamese(sent2vec, CauchySimilarity(width * 2))

    print("Read and parse data: %s" % dataset)
    if dataset == 'quora':
        train, dev = datasets.quora_questions()
    elif dataset == 'snli':
        train, dev = datasets.snli()
    elif dataset == 'stackxc':
        train, dev = datasets.stack_exchange()
    elif dataset in ('quora+snli', 'snli+quora'):
        train, dev = datasets.quora_questions()
        train2, dev2 = datasets.snli()
        train.extend(train2)
        dev.extend(dev2)
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    get_ids = get_word_ids(Model.ops)
    train_X, train_y = preprocess(model.ops, nlp, train, get_ids)
    dev_X, dev_y = preprocess(model.ops, nlp, dev, get_ids)

    with model.begin_training(train_X[:10000], train_y[:10000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate_logloss(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        n_iter = 0

        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            assert yh.shape == y.shape, (yh.shape, y.shape)

            assert (yh >= 0.).all(), yh
            train_acc = ((yh >= 0.5) == (y >= 0.5)).sum()
            loss = model.ops.xp.abs(yh - y).mean()
            epoch_train_acc += train_acc
            backprop(yh - y, optimizer)
            n_iter += 1

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
        if out_loc:
            out_loc = Path(out_loc)
            print('Saving to', out_loc)
            with out_loc.open('wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 14
0
def main(use_gpu=False):
    if use_gpu:
        Model.ops = CupyOps()
        Model.Ops = CupyOps
    train, test = datasets.imdb()
    print("Load data")
    train_X, train_y = zip(*train)
    test_X, test_y = zip(*test)
    train_y = to_categorical(train_y, nb_classes=2)
    test_y = to_categorical(test_y, nb_classes=2)

    nlp = spacy.load('en')
    nlp.vocab.lex_attr_getters[PREFIX] = lambda string: string[:3]
    for word in nlp.vocab:
        word.prefix_ = word.orth_[:3]

    dev_X = train_X[-1000:]
    dev_y = train_y[-1000:]
    train_X = train_X[:-1000]
    train_y = train_y[:-1000]
    #train_X = train_X[:1000]
    #train_y = train_y[:1000]
    print("Parse data")
    train_X = list(nlp.pipe(train_X))
    dev_X = list(nlp.pipe(dev_X))
    n_sent = sum([len(list(doc.sents)) for doc in train_X])
    print("%d sentences" % n_sent)

    hpsearch = BestFirstFinder(nonlin=[SELU],
                               width=[64],
                               depth=[2],
                               conv_depth=[2],
                               batch_size=[128],
                               learn_rate=[0.001],
                               L2=[1e-6],
                               beta1=[0.9],
                               beta2=[0.999],
                               dropout=[0.2])

    for hp in hpsearch.configs:
        for _ in range(3):
            model = build_model(2, train_X=train_X, train_y=train_y, **hp)
            with model.begin_training(train_X[:100],
                                      train_y[:100]) as (_, sgd):
                pass
            _, (model_data, train_acc,
                dev_acc) = train_epoch(model,
                                       sgd,
                                       hp,
                                       train_X,
                                       train_y,
                                       dev_X,
                                       dev_y,
                                       device_id=-1 if not use_gpu else 0)
            print('0', dev_acc * 100, train_acc * 100, hp)
            hpsearch.enqueue(model_data, train_acc, dev_acc)
            hpsearch.temperature = 0.0
    print("Train")
    total = 0
    temperature = 0.0
    while True:
        for model, sgd, hp in hpsearch:
            _, (new_model, train_acc,
                dev_acc) = train_epoch(model,
                                       sgd,
                                       hp,
                                       train_X,
                                       train_y,
                                       dev_X,
                                       dev_y,
                                       device_id=-1 if not use_gpu else 0,
                                       temperature=hpsearch.temperature)
        hp = new_model[-1]
        print(
            '%d,%d,%d:\t%.2f\t%.2f\t%.2f\t%d\t%.2f\t%.3f\t%d\t%d\t%.3f\t%.3f\t%.3f'
            % (total, hp['epochs'], hp['parent'], hpsearch.best_acc * 100,
               dev_acc * 100, train_acc * 100, int(
                   hp['batch_size']), hp['dropout'], hp['learn_rate'],
               hp['width'], hp['depth'], hpsearch.temperature,
               hpsearch.queue[0][0], hpsearch.queue[-1][0]))
        total += 1
        hpsearch.enqueue(new_model, train_acc, dev_acc)
Exemplo n.º 15
0
def main(loc=None,
         width=128,
         depth=2,
         max_batch_size=128,
         dropout=0.5,
         dropout_decay=1e-5,
         nb_epoch=30,
         use_gpu=False):
    cfg = dict(locals())

    print("Load spaCy")
    nlp = spacy.load('en',
                     parser=False,
                     entity=False,
                     matcher=False,
                     tagger=False)

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    with Model.define_operators({'>>': chain, '**': clone, '|': concatenate}):
        # Important trick: text isn't like images, and the best way to use
        # convolution is different. Don't use pooling-over-time. Instead,
        # use the window to compute one vector per word, and do this N deep.
        # In the first layer, we adjust each word vector based on the two
        # surrounding words --- this gives us essentially trigram vectors.
        # In the next layer, we have a trigram of trigrams --- so we're
        # conditioning on information from a five word slice. The third layer
        # gives us 7 words. This is like the BiLSTM insight: we're not trying
        # to learn a vector for the whole sentence in this step. We're just
        # trying to learn better, position-sensitive word features. This simple
        # convolution step is much more efficient than BiLSTM, and can be
        # computed in parallel for every token in the batch.
        mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width * 3)
        # Comments indicate the output type and shape at each step of the pipeline.
        # * B: Number of sentences in the batch
        # * T: Total number of words in the batch
        # (i.e. sum(len(sent) for sent in batch))
        # * W: Width of the network (input hyper-parameter)
        # * ids: ID for each word (integers).
        # * lengths: Number of words in each sentence in the batch (integers)
        # * floats: Standard dense vector.
        # (Dimensions annotated in curly braces.)
        sent2vec = (  # List[spacy.token.Doc]{B}
            #get_word_ids            # : List[ids]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(
                0,  # : word_ids{T}
                # This class integrates a linear projection layer, and loads
                # static embeddings (by default, GloVe common crawl).
                SpacyVectors(nlp, width)  # : floats{T, W}
                >> mwe_encode**depth  # : floats{T, W}
            )  # : (floats{T, W}, lengths{B})
            # Useful trick: Why choose between max pool and mean pool?
            # We may as well have both representations.
            >> Pooling(mean_pool, max_pool)  # : floats{B, 2*W}
        )
        model = ((
            (Arg(0) >> sent2vec) | (Arg(1) >> sent2vec))  # : floats{B, 4*W}
                 >> Maxout(width, width * 4)  # : floats{B, W}
                 >> Maxout(width, width)**depth  # : floats{B, W}
                 >> Softmax(3, width)  # : floats{B, 2}
                 )

    print("Read and parse SNLI data")
    train, dev = datasets.snli(loc)
    train_X, train_y = preprocess(model.ops, nlp, train)
    dev_X, dev_y = preprocess(model.ops, nlp, dev)
    assert len(dev_y.shape) == 2
    print("Initialize with data (LSUV)")
    with model.begin_training(train_X[:10000], train_y[:10000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = 1
        batch_size = 1.
        print("Accuracy before training", model.evaluate(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        for X, y in trainer.iterate(train_X, train_y):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            # No auto-diff: Just get a callback and pass the data through.
            # Hardly a hardship, and it means we don't have to create/maintain
            # a computational graph. We just use closures.
            backprop(yh - y, optimizer)

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001