def Tok2Vec(width, embed_size, **kwargs): pretrained_vectors = kwargs.get("pretrained_vectors", None) cnn_maxout_pieces = kwargs.get("cnn_maxout_pieces", 3) subword_features = kwargs.get("subword_features", True) conv_depth = kwargs.get("conv_depth", 4) bilstm_depth = kwargs.get("bilstm_depth", 0) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators( {">>": chain, "|": concatenate, "**": clone, "+": add, "*": reapply} ): norm = HashEmbed(width, embed_size, column=cols.index(NORM), name="embed_norm") if subword_features: prefix = HashEmbed( width, embed_size // 2, column=cols.index(PREFIX), name="embed_prefix" ) suffix = HashEmbed( width, embed_size // 2, column=cols.index(SUFFIX), name="embed_suffix" ) shape = HashEmbed( width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape" ) else: prefix, suffix, shape = (None, None, None) if pretrained_vectors is not None: glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID)) if subword_features: embed = uniqued( (glove | norm | prefix | suffix | shape) >> LN(Maxout(width, width * 5, pieces=3)), column=cols.index(ORTH), ) else: embed = uniqued( (glove | norm) >> LN(Maxout(width, width * 2, pieces=3)), column=cols.index(ORTH), ) elif subword_features: embed = uniqued( (norm | prefix | suffix | shape) >> LN(Maxout(width, width * 4, pieces=3)), column=cols.index(ORTH), ) else: embed = norm convolution = Residual( ExtractWindow(nW=1) >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces)) ) tok2vec = FeatureExtracter(cols) >> with_flatten( embed >> convolution ** conv_depth, pad=conv_depth ) if bilstm_depth >= 1: tok2vec = tok2vec >> PyTorchBiLSTM(width, width, bilstm_depth) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width tok2vec.embed = embed return tok2vec
def MultiHashEmbed(config): # For backwards compatibility with models before the architecture registry, # we have to be careful to get exactly the same model structure. One subtle # trick is that when we define concatenation with the operator, the operator # is actually binary associative. So when we write (a | b | c), we're actually # getting concatenate(concatenate(a, b), c). That's why the implementation # is a bit ugly here. cols = config["columns"] width = config["width"] rows = config["rows"] norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm", seed=1) if config["use_subwords"]: prefix = HashEmbed(width, rows // 2, column=cols.index("PREFIX"), name="embed_prefix", seed=2) suffix = HashEmbed(width, rows // 2, column=cols.index("SUFFIX"), name="embed_suffix", seed=3) shape = HashEmbed(width, rows // 2, column=cols.index("SHAPE"), name="embed_shape", seed=4) if config.get("@pretrained_vectors"): glove = make_layer(config["@pretrained_vectors"]) mix = make_layer(config["@mix"]) with Model.define_operators({">>": chain, "|": concatenate}): if config["use_subwords"] and config["@pretrained_vectors"]: mix._layers[0].nI = width * 5 layer = uniqued( (glove | norm | prefix | suffix | shape) >> mix, column=cols.index("ORTH"), ) elif config["use_subwords"]: mix._layers[0].nI = width * 4 layer = uniqued((norm | prefix | suffix | shape) >> mix, column=cols.index("ORTH")) elif config["@pretrained_vectors"]: mix._layers[0].nI = width * 2 layer = uniqued( (glove | norm) >> mix, column=cols.index("ORTH"), ) else: layer = norm layer.cfg = config return layer
def Tok2Vec(width, embed_size, **kwargs): pretrained_vectors = kwargs.get('pretrained_vectors', None) cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators({ '>>': chain, '|': concatenate, '**': clone, '+': add, '*': reapply }): norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm') prefix = HashEmbed(width, embed_size // 2, column=cols.index(PREFIX), name='embed_prefix') suffix = HashEmbed(width, embed_size // 2, column=cols.index(SUFFIX), name='embed_suffix') shape = HashEmbed(width, embed_size // 2, column=cols.index(SHAPE), name='embed_shape') if pretrained_vectors is not None: glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID)) embed = uniqued((glove | norm | prefix | suffix | shape) >> LN( Maxout(width, width * 5, pieces=3)), column=cols.index(ORTH)) else: embed = uniqued((norm | prefix | suffix | shape) >> LN( Maxout(width, width * 4, pieces=3)), column=cols.index(ORTH)) convolution = Residual( ExtractWindow( nW=1) >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))) tok2vec = (FeatureExtracter(cols) >> with_flatten( embed >> convolution**4, pad=4)) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width tok2vec.embed = embed return tok2vec
def build_model(nr_class, width, **kwargs): with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}): model = (FeatureExtracter([ORTH]) >> flatten_add_lengths >> with_getitem(0, uniqued(HashEmbed(width, 10000, column=0))) >> Pooling(mean_pool) >> Softmax(nr_class)) model.lsuv = False return model
def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 5000) pretrained_dims = cfg.get('pretrained_dims', 0) with Model.define_operators({ '>>': chain, '+': add, '|': concatenate, '**': clone }): if cfg.get('low_data') and pretrained_dims: model = (SpacyVectors >> flatten_add_lengths >> with_getitem( 0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width))**2 >> zero_init( Affine(nr_class, width, drop_factor=0.0)) >> logistic) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = (FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued((lower | prefix | suffix | shape) >> LN( Maxout(width, width + (width // 2) * 3)), column=0))) if pretrained_dims: static_vectors = ( SpacyVectors >> with_flatten(Affine(width, pretrained_dims))) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None cnn_model = ( vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2, pad=2) >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0))) linear_model = ( _preprocess_doc >> LinearModel(nr_class, drop_factor=0.)) model = ((linear_model | cnn_model) >> zero_init( Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic) model.nO = nr_class model.lsuv = False return model
def build_model(nr_class, width, **kwargs): with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}): model = ( FeatureExtracter([ORTH]) >> flatten_add_lengths >> with_getitem(0, uniqued(HashEmbed(width, 10000, column=0))) >> Pooling(mean_pool) >> Softmax(nr_class) ) model.lsuv = False return model
def Tok2Vec(width, embed_size, **kwargs): pretrained_vectors = kwargs.get('pretrained_vectors', None) cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add, '*': reapply}): norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm') prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX), name='embed_prefix') suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX), name='embed_suffix') shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE), name='embed_shape') if pretrained_vectors is not None: glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID)) embed = uniqued( (glove | norm | prefix | suffix | shape) >> LN(Maxout(width, width*5, pieces=3)), column=cols.index(ORTH)) else: embed = uniqued( (norm | prefix | suffix | shape) >> LN(Maxout(width, width*4, pieces=3)), column=cols.index(ORTH)) convolution = Residual( ExtractWindow(nW=1) >> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces)) ) tok2vec = ( FeatureExtracter(cols) >> with_flatten( embed >> convolution ** 4, pad=4 ) ) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width tok2vec.embed = embed return tok2vec
def my_tok_to_vec(width, embed_size, pretrained_vectors, **kwargs): # Circular imports :( from spacy._ml import PyTorchBiLSTM cnn_maxout_pieces = kwargs.get("cnn_maxout_pieces", 3) conv_depth = kwargs.get("conv_depth", 4) bilstm_depth = kwargs.get("bilstm_depth", 0) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] storage = [] with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): # norm = HashEmbed(width, embed_size, column=cols.index(NORM), name="embed_norm") # prefix = HashEmbed( # width, embed_size // 2, column=cols.index(PREFIX), name="embed_prefix" # ) # suffix = HashEmbed( # width, embed_size // 2, column=cols.index(SUFFIX), name="embed_suffix" # ) shape = HashEmbed( width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape" ) glove = Vectors(storage, pretrained_vectors, width, column=cols.index(NORM), ) vec_width = glove.nV embed = uniqued( (glove | shape) >> LN(Maxout(width, width + vec_width, pieces=3)), column=cols.index(ORTH), ) convolution = Residual( ExtractWindow(nW=1) >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces)) ) tok2vec = SaveDoc(storage) >> FeatureExtracter(cols) >> with_flatten( embed >> convolution ** conv_depth, pad=conv_depth ) if bilstm_depth >= 1: tok2vec = tok2vec >> PyTorchBiLSTM(width, width, bilstm_depth) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width tok2vec.embed = embed return tok2vec
def build_model(nr_class, width, depth, conv_depth, **kwargs): with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}): embed = ((HashEmbed(width, 5000, column=1) | HashEmbed(width // 2, 750, column=2) | HashEmbed(width // 2, 750, column=3) | HashEmbed(width // 2, 750, column=4)) >> Maxout(width)) sent2vec = ( FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE]) >> flatten_add_lengths >> with_getitem( 0, uniqued(embed, column=0) >> Residual(ExtractWindow(nW=1) >> SELU(width))**conv_depth) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual( SELU(width))**depth) model = ( foreach_sentence(sent2vec, drop_factor=2.0) >> flatten_add_lengths >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >> Residual(SELU(width))**depth >> Softmax(nr_class)) model.lsuv = False return model
def MultiHashEmbed(config): cols = config["columns"] width = config["width"] rows = config["rows"] tables = [HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm")] if config["use_subwords"]: for feature in ["PREFIX", "SUFFIX", "SHAPE"]: tables.append( HashEmbed( width, rows // 2, column=cols.index(feature), name="embed_%s" % feature.lower(), ) ) if config.get("@pretrained_vectors"): tables.append(make_layer(config["@pretrained_vectors"])) mix = make_layer(config["@mix"]) # This is a pretty ugly hack. Not sure what the best solution should be. mix._layers[0].nI = sum(table.nO for table in tables) layer = uniqued(chain(concatenate(*tables), mix), column=cols.index("ORTH")) layer.cfg = config return layer
def build_text_classifier(nr_class, width=64, **cfg): depth = cfg.get("depth", 2) nr_vector = cfg.get("nr_vector", 5000) pretrained_dims = cfg.get("pretrained_dims", 0) with Model.define_operators({ ">>": chain, "+": add, "|": concatenate, "**": clone }): if cfg.get("low_data") and pretrained_dims: model = (SpacyVectors >> flatten_add_lengths >> with_getitem( 0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width))**2 >> zero_init( Affine(nr_class, width, drop_factor=0.0)) >> logistic) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued( (lower | prefix | suffix | shape) >> LN( Maxout(width, width + (width // 2) * 3)), column=0, )) if pretrained_dims: static_vectors = SpacyVectors >> with_flatten( Affine(width, pretrained_dims)) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None tok2vec = vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**depth, pad=depth, ) cnn_model = ( tok2vec >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0))) linear_model = build_bow_text_classifier(nr_class, ngram_size=cfg.get( "ngram_size", 1), exclusive_classes=False) if cfg.get("exclusive_classes"): output_layer = Softmax(nr_class, nr_class * 2) else: output_layer = (zero_init( Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic) model = (linear_model | cnn_model) >> output_layer model.tok2vec = chain(tok2vec, flatten) model.nO = nr_class model.lsuv = False return model
def build_text_classifier(nr_class, width=64, **cfg): depth = cfg.get("depth", 2) nr_vector = cfg.get("nr_vector", 5000) pretrained_dims = cfg.get("pretrained_dims", 0) with Model.define_operators({">>": chain, "+": add, "|": concatenate, "**": clone}): if cfg.get("low_data") and pretrained_dims: model = ( SpacyVectors >> flatten_add_lengths >> with_getitem(0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width)) ** 2 >> zero_init(Affine(nr_class, width, drop_factor=0.0)) >> logistic ) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID] ) >> with_flatten( uniqued( (lower | prefix | suffix | shape) >> LN(Maxout(width, width + (width // 2) * 3)), column=0, ) ) if pretrained_dims: static_vectors = SpacyVectors >> with_flatten( Affine(width, pretrained_dims) ) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None tok2vec = vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual((ExtractWindow(nW=1) >> LN(Maxout(width, width * 3)))) ** depth, pad=depth, ) cnn_model = ( tok2vec >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0)) ) linear_model = build_bow_text_classifier( nr_class, ngram_size=cfg.get("ngram_size", 1), exclusive_classes=False ) if cfg.get("exclusive_classes"): output_layer = Softmax(nr_class, nr_class * 2) else: output_layer = ( zero_init(Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic ) model = (linear_model | cnn_model) >> output_layer model.tok2vec = chain(tok2vec, flatten) model.nO = nr_class model.lsuv = False return model
def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 5000) pretrained_dims = cfg.get('pretrained_dims', 0) with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}): if cfg.get('low_data') and pretrained_dims: model = ( SpacyVectors >> flatten_add_lengths >> with_getitem(0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width)) ** 2 >> zero_init(Affine(nr_class, width, drop_factor=0.0)) >> logistic ) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width//2, nr_vector, column=2) suffix = HashEmbed(width//2, nr_vector, column=3) shape = HashEmbed(width//2, nr_vector, column=4) trained_vectors = ( FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued( (lower | prefix | suffix | shape) >> LN(Maxout(width, width+(width//2)*3)), column=0 ) ) ) if pretrained_dims: static_vectors = ( SpacyVectors >> with_flatten(Affine(width, pretrained_dims)) ) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width*2 else: vectors = trained_vectors vectors_width = width static_vectors = None cnn_model = ( vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width*3))) ) ** 2, pad=2 ) >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0)) ) linear_model = ( _preprocess_doc >> LinearModel(nr_class) ) #model = linear_model >> logistic model = ( (linear_model | cnn_model) >> zero_init(Affine(nr_class, nr_class*2, drop_factor=0.0)) >> logistic ) model.nO = nr_class model.lsuv = False return model
def Tok2Vec(width, embed_size, **kwargs): # Circular imports :( from .._ml import CharacterEmbed from .._ml import PyTorchBiLSTM pretrained_vectors = kwargs.get("pretrained_vectors", None) cnn_maxout_pieces = kwargs.get("cnn_maxout_pieces", 3) subword_features = kwargs.get("subword_features", True) char_embed = kwargs.get("char_embed", False) if char_embed: subword_features = False conv_depth = kwargs.get("conv_depth", 4) bilstm_depth = kwargs.get("bilstm_depth", 0) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): norm = HashEmbed(width, embed_size, column=cols.index(NORM), name="embed_norm", seed=6) if subword_features: prefix = HashEmbed(width, embed_size // 2, column=cols.index(PREFIX), name="embed_prefix", seed=7) suffix = HashEmbed(width, embed_size // 2, column=cols.index(SUFFIX), name="embed_suffix", seed=8) shape = HashEmbed(width, embed_size // 2, column=cols.index(SHAPE), name="embed_shape", seed=9) else: prefix, suffix, shape = (None, None, None) if pretrained_vectors is not None: glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID)) if subword_features: embed = uniqued( (glove | norm | prefix | suffix | shape) >> LN( Maxout(width, width * 5, pieces=3)), column=cols.index(ORTH), ) elif char_embed: embed = concatenate_lists( CharacterEmbed(nM=64, nC=8), FeatureExtracter(cols) >> with_flatten(glove), ) reduce_dimensions = LN( Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)) else: embed = uniqued( (glove | norm) >> LN(Maxout(width, width * 2, pieces=3)), column=cols.index(ORTH), ) elif subword_features: embed = uniqued( (norm | prefix | suffix | shape) >> LN( Maxout(width, width * 4, pieces=3)), column=cols.index(ORTH), ) elif char_embed: embed = concatenate_lists( CharacterEmbed(nM=64, nC=8), FeatureExtracter(cols) >> with_flatten(norm), ) reduce_dimensions = LN( Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)) else: embed = norm convolution = Residual( ExtractWindow( nW=1) >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))) if char_embed: tok2vec = embed >> with_flatten( reduce_dimensions >> convolution**conv_depth, pad=conv_depth) else: tok2vec = FeatureExtracter(cols) >> with_flatten( embed >> convolution**conv_depth, pad=conv_depth) if bilstm_depth >= 1: tok2vec = tok2vec >> PyTorchBiLSTM(width, width, bilstm_depth) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width tok2vec.embed = embed return tok2vec
def TextCatEnsemble_v1( width: int, embed_size: int, pretrained_vectors: Optional[bool], exclusive_classes: bool, ngram_size: int, window_size: int, conv_depth: int, dropout: Optional[float], nO: Optional[int] = None, ) -> Model: # Don't document this yet, I'm not sure it's right. cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID] with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout, seed=10) prefix = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout, seed=11, ) suffix = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout, seed=12, ) shape = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout, seed=13, ) width_nI = sum( layer.get_dim("nO") for layer in [lower, prefix, suffix, shape]) trained_vectors = FeatureExtractor(cols) >> with_array( uniqued( (lower | prefix | suffix | shape) >> Maxout( nO=width, nI=width_nI, normalize=True), column=cols.index(ORTH), )) if pretrained_vectors: static_vectors = StaticVectors(width) vector_layer = trained_vectors | static_vectors vectors_width = width * 2 else: vector_layer = trained_vectors vectors_width = width tok2vec = vector_layer >> with_array( Maxout(width, vectors_width, normalize=True) >> residual((expand_window(window_size=window_size) >> Maxout( nO=width, nI=width * ((window_size * 2) + 1), normalize=True)))**conv_depth, pad=conv_depth, ) cnn_model = (tok2vec >> list2ragged() >> ParametricAttention(width) >> reduce_sum() >> residual(Maxout(nO=width, nI=width)) >> Linear(nO=nO, nI=width) >> Dropout(0.0)) linear_model = build_bow_text_classifier( nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False, ) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.attrs["multi_label"] = not exclusive_classes return model