Exemplo n.º 1
0
 def __init__(self, storage, vecs, drop_factor=0.0, column=0):
     Model.__init__(self)
     self.storage = storage
     self.vecs = vecs
     self.nV = 300
     self.drop_factor = drop_factor
     self.column = column
Exemplo n.º 2
0
def build_model(nr_class, width, **kwargs):
    with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}):
        model = (FeatureExtracter([ORTH]) >> flatten_add_lengths >>
                 with_getitem(0, uniqued(HashEmbed(width, 10000, column=0))) >>
                 Pooling(mean_pool) >> Softmax(nr_class))
    model.lsuv = False
    return model
Exemplo n.º 3
0
    def Model(cls, nr_class, **cfg):
        depth = util.env_opt('parser_hidden_depth', cfg.get('hidden_depth', 1))
        subword_features = util.env_opt('subword_features',
                                        cfg.get('subword_features', True))
        conv_depth = util.env_opt('conv_depth', cfg.get('conv_depth', 4))
        conv_window = util.env_opt('conv_window', cfg.get('conv_depth', 1))
        t2v_pieces = util.env_opt('cnn_maxout_pieces',
                                  cfg.get('cnn_maxout_pieces', 3))
        bilstm_depth = util.env_opt('bilstm_depth', cfg.get('bilstm_depth', 0))
        self_attn_depth = util.env_opt('self_attn_depth',
                                       cfg.get('self_attn_depth', 0))
        assert depth == 1
        parser_maxout_pieces = util.env_opt('parser_maxout_pieces',
                                            cfg.get('maxout_pieces', 2))
        token_vector_width = util.env_opt('token_vector_width',
                                          cfg.get('token_vector_width', 96))
        hidden_width = util.env_opt('hidden_width',
                                    cfg.get('hidden_width', 64))
        embed_size = util.env_opt('embed_size', cfg.get('embed_size', 2000))
        tok2vec = get_t2v(token_vector_width,
                          embed_size,
                          conv_depth=conv_depth,
                          conv_window=conv_window,
                          cnn_maxout_pieces=t2v_pieces,
                          subword_features=subword_features,
                          bilstm_depth=bilstm_depth)
        tok2vec = chain(tok2vec, flatten)
        tok2vec.nO = token_vector_width
        lower = PrecomputableAffine(hidden_width,
                                    nF=cls.nr_feature,
                                    nI=token_vector_width,
                                    nP=parser_maxout_pieces)
        lower.nP = parser_maxout_pieces

        with Model.use_device('cpu'):
            upper = Affine(nr_class, hidden_width, drop_factor=0.0)
        upper.W *= 0

        cfg = {
            'nr_class': nr_class,
            'hidden_depth': depth,
            'token_vector_width': token_vector_width,
            'hidden_width': hidden_width,
            'maxout_pieces': parser_maxout_pieces,
            'pretrained_vectors': None,
            'bilstm_depth': bilstm_depth,
            'self_attn_depth': self_attn_depth,
            'conv_depth': conv_depth,
            'conv_window': conv_window,
            'embed_size': embed_size,
            'cnn_maxout_pieces': t2v_pieces
        }
        return ParserModel(tok2vec, lower, upper), cfg
Exemplo n.º 4
0
def my_tok_to_vec(width, embed_size, pretrained_vectors, **kwargs):
    # Circular imports :(
    from spacy._ml import PyTorchBiLSTM

    cnn_maxout_pieces = kwargs.get("cnn_maxout_pieces", 3)
    conv_depth = kwargs.get("conv_depth", 4)
    bilstm_depth = kwargs.get("bilstm_depth", 0)
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
    storage = []
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
        # norm = HashEmbed(width, embed_size, column=cols.index(NORM), name="embed_norm")
        # prefix = HashEmbed(
        #     width, embed_size // 2, column=cols.index(PREFIX), name="embed_prefix"
        # )
        # suffix = HashEmbed(
        #     width, embed_size // 2, column=cols.index(SUFFIX), name="embed_suffix"
        # )
        shape = HashEmbed(
            width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape"
        )
        glove = Vectors(storage, pretrained_vectors, width, column=cols.index(NORM), )
        vec_width = glove.nV

        embed = uniqued(
            (glove | shape)
            >> LN(Maxout(width, width + vec_width, pieces=3)),
            column=cols.index(ORTH),
        )

        convolution = Residual(
            ExtractWindow(nW=1)
            >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))
        )

        tok2vec = SaveDoc(storage) >> FeatureExtracter(cols) >> with_flatten(
            embed >> convolution ** conv_depth, pad=conv_depth
        )

        if bilstm_depth >= 1:
            tok2vec = tok2vec >> PyTorchBiLSTM(width, width, bilstm_depth)
        # Work around thinc API limitations :(. TODO: Revise in Thinc 7

        tok2vec.nO = width
        tok2vec.embed = embed
    return tok2vec
Exemplo n.º 5
0
def build_model(n_tags, n_words, word_width, tag_width, hidden_width):
    with Model.define_operators({'|': concatenate, '>>': chain}):
        words_model = (
            with_flatten(
                Embed(word_width, word_width, n_words), pad=0
            )
            >> BiLSTM(word_width, word_width)
            >> flatten_add_lengths
            >> getitem(0)
            >> Affine(hidden_width, word_width * 2)
            >> pad_and_reshape
        )

        tags_model = (
            Embed(tag_width, tag_width, n_tags)
            >> Affine(hidden_width, tag_width)
        )

        state_model = Affine(hidden_width, hidden_width)

        output_model = Softmax(n_tags, hidden_width)
        words_model.nO = hidden_width
        state_model.nO = hidden_width
        output_model.nO = n_tags

    def fwd_step(features, drop=0.):
        word_feats, prev_tags, prev_state = features
        tag_feats, bp_tags = tags_model.begin_update(prev_tags, drop=drop)
        state_feats, bp_state = state_model.begin_update(prev_state, drop=drop)

        preact = word_feats + tag_feats + state_feats
        nonlin = preact > 0
        state = preact * nonlin
        scores, bp_scores = output_model.begin_update(state, drop=drop)

        def bwd_step(d_scores, d_next_state, sgd=None):
            d_state = d_next_state + bp_scores(d_scores, sgd=sgd)
            d_state *= nonlin
            bp_tags(d_state, sgd=sgd)
            d_prev_state = bp_state(d_state, sgd=sgd)
            return d_state, d_prev_state
        (state, scores), bwd_step
    return words_model, fwd_step
Exemplo n.º 6
0
def build_model(nr_class, width, depth, conv_depth, **kwargs):
    with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}):
        embed = ((HashEmbed(width, 5000, column=1)
                  | HashEmbed(width // 2, 750, column=2)
                  | HashEmbed(width // 2, 750, column=3)
                  | HashEmbed(width // 2, 750, column=4)) >> Maxout(width))

        sent2vec = (
            FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE]) >>
            flatten_add_lengths >> with_getitem(
                0,
                uniqued(embed, column=0) >>
                Residual(ExtractWindow(nW=1) >> SELU(width))**conv_depth) >>
            ParametricAttention(width) >> Pooling(sum_pool) >> Residual(
                SELU(width))**depth)

        model = (
            foreach_sentence(sent2vec, drop_factor=2.0) >> flatten_add_lengths
            >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >>
            Residual(SELU(width))**depth >> Softmax(nr_class))
    model.lsuv = False
    return model
Exemplo n.º 7
0
def main(dataset='quora',
         width=128,
         depth=2,
         min_batch_size=128,
         max_batch_size=128,
         dropout=0.2,
         dropout_decay=0.0,
         pooling="mean+max",
         nb_epoch=20,
         pieces=3,
         use_gpu=False,
         out_loc=None,
         quiet=False):
    cfg = dict(locals())
    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == 'mean+max':
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)

    print("Load spaCy")
    nlp = get_spacy('en')

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    with Model.define_operators({
            '>>': chain,
            '**': clone,
            '|': concatenate,
            '*': multiply
    }):
        # Important trick: text isn't like images, and the best way to use
        # convolution is different. Don't use pooling-over-time. Instead,
        # use the window to compute one vector per word, and do this N deep.
        # In the first layer, we adjust each word vector based on the two
        # surrounding words --- this gives us essentially trigram vectors.
        # In the next layer, we have a trigram of trigrams --- so we're
        # conditioning on information from a five word slice. The third layer
        # gives us 7 words. This is like the BiLSTM insight: we're not trying
        # to learn a vector for the whole sentence in this step. We're just
        # trying to learn better, position-sensitive word features. This simple
        # convolution step is much more efficient than BiLSTM, and can be
        # computed in parallel for every token in the batch.
        mwe_encode = ExtractWindow(nW=1) >> Maxout(
            width, width * 3, pieces=pieces)
        # Comments indicate the output type and shape at each step of the pipeline.
        # * B: Number of sentences in the batch
        # * T: Total number of words in the batch
        # (i.e. sum(len(sent) for sent in batch))
        # * W: Width of the network (input hyper-parameter)
        # * ids: ID for each word (integers).
        # * lengths: Number of words in each sentence in the batch (integers)
        # * floats: Standard dense vector.
        # (Dimensions annotated in curly braces.)
        sent2vec = (  # List[spacy.token.Doc]{B}
            get_word_ids >> flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(
                0,  # : word_ids{T}
                (TokenWeights(Model.ops, nlp) * SpacyVectors('en', width) >>
                 mwe_encode**depth))  # : (floats{T, W}, lengths{B})
            # Useful trick: Why choose between max pool and mean pool?
            # We may as well have both representations.
            >> pool_layer  # : floats{B, 2*W}
        )
        model = (
            diff(sent2vec)  # : floats{B, 8*W}
            >> Maxout(width, pieces=pieces)  # : floats{B, W}
            >> Softmax()  # : floats{B, 2}
        )

    print("Read and parse data: %s" % dataset)
    if dataset == 'quora':
        train, dev = datasets.quora_questions()
    elif dataset == 'snli':
        train, dev = datasets.snli()
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    train_X, train_y = preprocess(model.ops, nlp, train)
    dev_X, dev_y = preprocess(model.ops, nlp, dev)
    assert len(dev_y.shape) == 2
    print("Initialize with data (LSUV)")
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            # No auto-diff: Just get a callback and pass the data through.
            # Hardly a hardship, and it means we don't have to create/maintain
            # a computational graph. We just use closures.
            backprop(yh - y, optimizer)

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
        if out_loc:
            out_loc = Path(out_loc)
            print('Saving to', out_loc)
            with out_loc.open('wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 8
0
def main(
    dataset="quora",
    width=64,
    depth=2,
    min_batch_size=1,
    max_batch_size=128,
    dropout=0.0,
    dropout_decay=0.0,
    pooling="mean+max",
    nb_epoch=20,
    pieces=3,
    use_gpu=False,
    out_loc=None,
    quiet=False,
):
    cfg = dict(locals())
    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == "mean+max":
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)

    print("Load spaCy")
    nlp = get_spacy("en")

    # if use_gpu:
    #    Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    with Model.define_operators({">>": chain, "**": clone, "|": concatenate, "+": add}):
        mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width * 3, pieces=pieces)

        embed = StaticVectors("en", width)  # + Embed(width, width*2, 5000)
        # Comments indicate the output type and shape at each step of the pipeline.
        # * B: Number of sentences in the batch
        # * T: Total number of words in the batch
        # (i.e. sum(len(sent) for sent in batch))
        # * W: Width of the network (input hyper-parameter)
        # * ids: ID for each word (integers).
        # * lengths: Number of words in each sentence in the batch (integers)
        # * floats: Standard dense vector.
        # (Dimensions annotated in curly braces.)
        sent2vec = (  # List[spacy.token.Doc]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(
                0, embed >> mwe_encode ** depth  # : word_ids{T}
            )  # : (floats{T, W}, lengths{B})
            >> pool_layer
            >> Maxout(width, pieces=pieces)
            >> Maxout(width, pieces=pieces)
        )
        model = (
            ((Arg(0) >> sent2vec) | (Arg(1) >> sent2vec))
            >> Maxout(width, pieces=pieces)
            >> Maxout(width, pieces=pieces)
            >> Softmax(2)
        )

    print("Read and parse data: %s" % dataset)
    if dataset == "quora":
        train, dev = datasets.quora_questions()
    elif dataset == "snli":
        train, dev = datasets.snli()
    elif dataset == "stackxc":
        train, dev = datasets.stack_exchange()
    elif dataset in ("quora+snli", "snli+quora"):
        train, dev = datasets.quora_questions()
        train2, dev2 = datasets.snli()
        train.extend(train2)
        dev.extend(dev2)
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    get_ids = get_word_ids(Model.ops)
    train_X, train_y = preprocess(model.ops, nlp, train, get_ids)
    dev_X, dev_y = preprocess(model.ops, nlp, dev, get_ids)

    print("Initialize with data (LSUV)")
    print(dev_y.shape)
    with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (
        trainer,
        optimizer,
    ):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            assert yh.shape == y.shape, (yh.shape, y.shape)
            # No auto-diff: Just get a callback and pass the data through.
            # Hardly a hardship, and it means we don't have to create/maintain
            # a computational graph. We just use closures.

            assert (yh >= 0.0).all()
            train_acc = (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            epoch_train_acc += train_acc

            backprop(yh - y, optimizer)

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
        if out_loc:
            out_loc = Path(out_loc)
            print("Saving to", out_loc)
            with out_loc.open("wb") as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 9
0
 def __init__(self, length):
     Model.__init__(self)
     self.nO = length
Exemplo n.º 10
0
def main(dataset='quora',
         width=50,
         depth=2,
         min_batch_size=1,
         max_batch_size=512,
         dropout=0.2,
         dropout_decay=0.0,
         pooling="mean+max",
         nb_epoch=5,
         pieces=3,
         L2=0.0,
         use_gpu=False,
         out_loc=None,
         quiet=False,
         job_id=None,
         ws_api_url=None,
         rest_api_url=None):
    global CTX
    if job_id is not None:
        CTX = neptune.Context()
        width = CTX.params.width
        L2 = CTX.params.L2
        nb_epoch = CTX.params.nb_epoch
        depth = CTX.params.depth
        max_batch_size = CTX.params.max_batch_size
    cfg = dict(locals())

    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == 'mean+max':
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)

    print("Load spaCy")
    nlp = get_spacy('en')

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    Model.lsuv = True
    #Model.ops = CupyOps()
    with Model.define_operators({
            '>>': chain,
            '**': clone,
            '|': concatenate,
            '+': add
    }):
        mwe_encode = ExtractWindow(nW=1) >> BN(
            Maxout(width, drop_factor=0.0, pieces=pieces))

        sent2vec = (  # List[spacy.token.Doc]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(0,
                            (StaticVectors('en', width) +
                             HashEmbed(width, 3000) + HashEmbed(width, 3000))
                            #>> Residual(mwe_encode ** 2)
                            )  # : word_ids{T}
            >> Pooling(mean_pool, max_pool)
            #>> Residual(BN(Maxout(width*2, pieces=pieces), nO=width*2)**2)
            >> Maxout(width * 2, pieces=pieces, drop_factor=0.0) >> logistic)
        model = Siamese(sent2vec, CauchySimilarity(width * 2))

    print("Read and parse data: %s" % dataset)
    if dataset == 'quora':
        train, dev = datasets.quora_questions()
    elif dataset == 'snli':
        train, dev = datasets.snli()
    elif dataset == 'stackxc':
        train, dev = datasets.stack_exchange()
    elif dataset in ('quora+snli', 'snli+quora'):
        train, dev = datasets.quora_questions()
        train2, dev2 = datasets.snli()
        train.extend(train2)
        dev.extend(dev2)
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    get_ids = get_word_ids(Model.ops)
    train_X, train_y = preprocess(model.ops, nlp, train, get_ids)
    dev_X, dev_y = preprocess(model.ops, nlp, dev, get_ids)

    with model.begin_training(train_X[:10000], train_y[:10000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate_logloss(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        n_iter = 0

        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            assert yh.shape == y.shape, (yh.shape, y.shape)

            assert (yh >= 0.).all(), yh
            train_acc = ((yh >= 0.5) == (y >= 0.5)).sum()
            loss = model.ops.xp.abs(yh - y).mean()
            track_stat('loss', n_iter, loss)
            track_stat('train acc', n_iter, train_acc)
            track_stat('LR', n_iter, optimizer.lr(n_iter + 1))
            epoch_train_acc += train_acc
            backprop(yh - y, optimizer)
            optimizer.set_loss(loss)
            n_iter += 1

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
            track_stat('Batch size', n_iter, y.shape[0])
        if out_loc:
            out_loc = Path(out_loc)
            print('Saving to', out_loc)
            with out_loc.open('wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 11
0
def main(dataset='quora',
         width=64,
         depth=1,
         min_batch_size=128,
         max_batch_size=128,
         dropout=0.0,
         dropout_decay=0.0,
         pooling="mean+max",
         nb_epoch=20,
         pieces=2,
         use_gpu=False,
         out_loc=None,
         quiet=False):
    cfg = dict(locals())
    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == 'mean+max':
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)

    print("Load spaCy")
    nlp = get_spacy('en')

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    with Model.define_operators({
            '>>': chain,
            '**': clone,
            '|': concatenate,
            '+': add
    }):
        mwe_encode = ExtractWindow(nW=1) >> Maxout(
            width, width * 3, pieces=pieces)

        embed = StaticVectors('en', width)  #+ Embed(width, width, 5000)
        sent2mat = (get_word_ids(Model.ops) >>
                    with_flatten(embed >> mwe_encode**depth))
        model = Siamese(sent2mat, WordMoversSimilarity(Model.ops))

    print("Read and parse data: %s" % dataset)
    if dataset == 'quora':
        train, dev = datasets.quora_questions()
    elif dataset == 'snli':
        train, dev = datasets.snli()
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    train_X, train_y = preprocess(model.ops, nlp, train)
    dev_X, dev_y = preprocess(model.ops, nlp, dev)
    print("Initialize with data (LSUV)")
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            # No auto-diff: Just get a callback and pass the data through.
            # Hardly a hardship, and it means we don't have to create/maintain
            # a computational graph. We just use closures.

            train_acc = ((yh >= 0.5) == (y >= 0.5)).sum()
            epoch_train_acc += train_acc

            backprop(yh - y, optimizer)

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
        if out_loc:
            out_loc = Path(out_loc)
            print('Saving to', out_loc)
            with out_loc.open('wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 12
0
 def __init__(self, length):
     Model.__init__(self)
     self.nO = length
def main(dataset='quora', width=64, depth=2, min_batch_size=1,
        max_batch_size=128, dropout=0.0, dropout_decay=0.0, pooling="mean+max",
        nb_epoch=20, pieces=3, use_gpu=False, out_loc=None, quiet=False):
    cfg = dict(locals())
    if out_loc:
        out_loc = Path(out_loc)
        if not out_loc.parent.exists():
            raise IOError("Can't open output location: %s" % out_loc)
    print(cfg)
    if pooling == 'mean+max':
        pool_layer = Pooling(mean_pool, max_pool)
    elif pooling == "mean":
        pool_layer = mean_pool
    elif pooling == "max":
        pool_layer = max_pool
    else:
        raise ValueError("Unrecognised pooling", pooling)


    print("Load spaCy")
    nlp = get_spacy('en')

    #if use_gpu:
    #    Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    with Model.define_operators({'>>': chain, '**': clone, '|': concatenate,
                                 '+': add}):
        mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width*3, pieces=pieces)

        embed = StaticVectors('en', width)# + Embed(width, width*2, 5000)
        # Comments indicate the output type and shape at each step of the pipeline.
        # * B: Number of sentences in the batch
        # * T: Total number of words in the batch
        # (i.e. sum(len(sent) for sent in batch))
        # * W: Width of the network (input hyper-parameter)
        # * ids: ID for each word (integers).
        # * lengths: Number of words in each sentence in the batch (integers)
        # * floats: Standard dense vector.
        # (Dimensions annotated in curly braces.)
        sent2vec = ( # List[spacy.token.Doc]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(0,      # : word_ids{T}
                 embed
                 >> mwe_encode ** depth
            ) # : (floats{T, W}, lengths{B})
            >> pool_layer
            >> Maxout(width, pieces=pieces)
            >> Maxout(width, pieces=pieces)
        )
        model = (
            ((Arg(0) >> sent2vec) | (Arg(1) >> sent2vec))
            >> Maxout(width, pieces=pieces)
            >> Maxout(width, pieces=pieces)
            >> Softmax(2)
        )

    print("Read and parse data: %s" % dataset)
    if dataset == 'quora':
        train, dev = datasets.quora_questions()
    elif dataset == 'snli':
        train, dev = datasets.snli()
    elif dataset == 'stackxc':
        train, dev = datasets.stack_exchange()
    elif dataset in ('quora+snli', 'snli+quora'):
        train, dev = datasets.quora_questions()
        train2, dev2 = datasets.snli()
        train.extend(train2)
        dev.extend(dev2)
    else:
        raise ValueError("Unknown dataset: %s" % dataset)
    get_ids = get_word_ids(Model.ops)
    train_X, train_y = preprocess(model.ops, nlp, train, get_ids)
    dev_X, dev_y = preprocess(model.ops, nlp, dev, get_ids)

    print("Initialize with data (LSUV)")
    print(dev_y.shape)
    with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        print("Accuracy before training", model.evaluate(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        for X, y in trainer.iterate(train_X, train_y, progress_bar=not quiet):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            assert yh.shape == y.shape, (yh.shape, y.shape)
            # No auto-diff: Just get a callback and pass the data through.
            # Hardly a hardship, and it means we don't have to create/maintain
            # a computational graph. We just use closures.

            assert (yh >= 0.).all()
            train_acc = (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            epoch_train_acc += train_acc

            backprop(yh-y, optimizer)

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
        if out_loc:
            out_loc = Path(out_loc)
            print('Saving to', out_loc)
            with out_loc.open('wb') as file_:
                pickle.dump(model, file_, -1)
Exemplo n.º 14
0
def main(loc=None,
         width=128,
         depth=2,
         max_batch_size=128,
         dropout=0.5,
         dropout_decay=1e-5,
         nb_epoch=30,
         use_gpu=False):
    cfg = dict(locals())

    print("Load spaCy")
    nlp = spacy.load('en',
                     parser=False,
                     entity=False,
                     matcher=False,
                     tagger=False)

    if use_gpu:
        Model.ops = CupyOps()

    print("Construct model")
    # Bind operators for the scope of the block:
    # * chain (>>): Compose models in a 'feed forward' style,
    # i.e. chain(f, g)(x) -> g(f(x))
    # * clone (**): Create n copies of a model, and chain them, i.e.
    # (f ** 3)(x) -> f''(f'(f(x))), where f, f' and f'' have distinct weights.
    # * concatenate (|): Merge the outputs of two models into a single vector,
    # i.e. (f|g)(x) -> hstack(f(x), g(x))
    with Model.define_operators({'>>': chain, '**': clone, '|': concatenate}):
        # Important trick: text isn't like images, and the best way to use
        # convolution is different. Don't use pooling-over-time. Instead,
        # use the window to compute one vector per word, and do this N deep.
        # In the first layer, we adjust each word vector based on the two
        # surrounding words --- this gives us essentially trigram vectors.
        # In the next layer, we have a trigram of trigrams --- so we're
        # conditioning on information from a five word slice. The third layer
        # gives us 7 words. This is like the BiLSTM insight: we're not trying
        # to learn a vector for the whole sentence in this step. We're just
        # trying to learn better, position-sensitive word features. This simple
        # convolution step is much more efficient than BiLSTM, and can be
        # computed in parallel for every token in the batch.
        mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width * 3)
        # Comments indicate the output type and shape at each step of the pipeline.
        # * B: Number of sentences in the batch
        # * T: Total number of words in the batch
        # (i.e. sum(len(sent) for sent in batch))
        # * W: Width of the network (input hyper-parameter)
        # * ids: ID for each word (integers).
        # * lengths: Number of words in each sentence in the batch (integers)
        # * floats: Standard dense vector.
        # (Dimensions annotated in curly braces.)
        sent2vec = (  # List[spacy.token.Doc]{B}
            #get_word_ids            # : List[ids]{B}
            flatten_add_lengths  # : (ids{T}, lengths{B})
            >> with_getitem(
                0,  # : word_ids{T}
                # This class integrates a linear projection layer, and loads
                # static embeddings (by default, GloVe common crawl).
                SpacyVectors(nlp, width)  # : floats{T, W}
                >> mwe_encode**depth  # : floats{T, W}
            )  # : (floats{T, W}, lengths{B})
            # Useful trick: Why choose between max pool and mean pool?
            # We may as well have both representations.
            >> Pooling(mean_pool, max_pool)  # : floats{B, 2*W}
        )
        model = ((
            (Arg(0) >> sent2vec) | (Arg(1) >> sent2vec))  # : floats{B, 4*W}
                 >> Maxout(width, width * 4)  # : floats{B, W}
                 >> Maxout(width, width)**depth  # : floats{B, W}
                 >> Softmax(3, width)  # : floats{B, 2}
                 )

    print("Read and parse SNLI data")
    train, dev = datasets.snli(loc)
    train_X, train_y = preprocess(model.ops, nlp, train)
    dev_X, dev_y = preprocess(model.ops, nlp, dev)
    assert len(dev_y.shape) == 2
    print("Initialize with data (LSUV)")
    with model.begin_training(train_X[:10000], train_y[:10000],
                              **cfg) as (trainer, optimizer):
        # Pass a callback to print progress. Give it all the local scope,
        # because why not?
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = 1
        batch_size = 1.
        print("Accuracy before training", model.evaluate(dev_X, dev_y))
        print("Train")
        global epoch_train_acc
        for X, y in trainer.iterate(train_X, train_y):
            # Slightly useful trick: Decay the dropout as training proceeds.
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            # No auto-diff: Just get a callback and pass the data through.
            # Hardly a hardship, and it means we don't have to create/maintain
            # a computational graph. We just use closures.
            backprop(yh - y, optimizer)

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()

            # Slightly useful trick: start with low batch size, accelerate.
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
Exemplo n.º 15
0
 def __init__(self, storage):
     Model.__init__(self)
     self.storage = storage