Exemplo n.º 1
0
def add_variable_inputs(sym, sym_input_dict, is_training):
    arg_shape, _, aux_shape = sym.infer_shape()
    arg_type, _, aux_type = sym.infer_type()
    arg_shape_type = zip(sym.list_arguments(), arg_shape, arg_type)
    aux_shape_type = zip(sym.list_auxiliary_states(), aux_shape, aux_type)
    node_info = json.loads(sym.tojson())["nodes"]
    for item in arg_shape_type:
        import xdl.python.framework.variable as variable
        import xdl.python.backend.mxnet.convert_utils as cu
        if item[0] not in sym_input_dict:
            initializer_and_args = get_initializer_and_args(item[0], node_info)
            xdl_var = variable.Variable(
                name=item[0],
                shape=item[1],
                dtype=cu.MX2XDL.convert_type(item[2]),
                trainable=True,
                initializer=cu.MX2XDL.convert_initializer(
                    initializer_and_args[0], initializer_and_args[1]))
            sym_input_dict[item[0]] = xdl_var.value
    for item in aux_shape_type:
        if item[0].endswith('_moving_mean') or \
              item[0].endswith('_moving_var'):
            initializer_and_args = get_initializer_and_args(item[0], node_info)
            xdl_var = variable.Variable(
                name=item[0],
                shape=item[1],
                dtype=cu.MX2XDL.convert_type(item[2]),
                trainable=True,
                initializer=cu.MX2XDL.convert_initializer(
                    initializer_and_args[0], initializer_and_args[1]))
            if not is_training:
                sym_input_dict[item[0]] = xdl_var.value
Exemplo n.º 2
0
def merged_embedding(name,
                     sparse_inputs,
                     initializer,
                     emb_dim,
                     feature_dim,
                     combiner='sum',
                     vtype=VarType.Index,
                     length=50,
                     reverse=False):
    """xdl embedding
       Args:
         name: name for embedding, will be used for declaring variable on ps-plus
         sparse_inputs: a list of sparse tensors represent input data
         initializer: intializer for the weights
         emb_dim: embedding dimension
         feature_dim: sparse input dimension, for pre-allocate memory
         combiner: reduce operator, support sum|mean
       Returns:
         a tensor represent embedding result
       Raises:
         None
    """
    import xdl.python.framework.variable as variable
    var = variable.Variable(name=name,
                            dtype=DataType.float,
                            shape=[feature_dim, emb_dim],
                            initializer=initializer,
                            vtype=vtype,
                            trainable=True)
    merged_sparse_inputs = merge_sparse(sparse_inputs)
    ids = merged_sparse_inputs.ids
    unique_ids, idx = xdl.unique(ids, itype=DataType.int32)
    embeddings = var.gather(unique_ids, save_ratio=feature_add_probability)
    global _EMBEDDING_TENSOR
    _EMBEDDING_TENSOR[embeddings] = var
    import xdl.python.sparse_engine.embedding_ops as embedding_ops
    if combiner == 'sum':
        embeddings = embedding_ops.merged_ksum(embeddings, idx,
                                               merged_sparse_inputs.values,
                                               merged_sparse_inputs.segments,
                                               merged_sparse_inputs.groups)
    elif combiner == 'mean':
        embeddings = embedding_ops.merged_kmean(embeddings, idx,
                                                merged_sparse_inputs.values,
                                                merged_sparse_inputs.segments,
                                                merged_sparse_inputs.groups)
    elif combiner == 'tile':
        embeddings = embedding_ops.merged_tile(embeddings, idx,
                                               merged_sparse_inputs.values,
                                               merged_sparse_inputs.segments,
                                               merged_sparse_inputs.groups,
                                               length, reverse)
    else:
        raise Exception("Unrecognized combiner:" + str(combiner))

    emb_info = EmbeddingInfo(name, feature_dim, emb_dim, combiner, None, var,
                             embeddings)
    set_embedding_info([var], emb_info)
    return embeddings
Exemplo n.º 3
0
def merged_embedding(name, sparse_inputs, initializer, emb_dim, feature_dim,
                     combiner='sum', vtype=VarType.Index, length=50, reverse=False,
                     batch_read=3000, feature_add_probability=1.0, cbf=0, device='CPU', **device_attr):
    """xdl embedding
       Args:
         name: name for embedding, will be used for declaring variable on ps-plus
         sparse_inputs: a list of sparse tensors represent input data
         initializer: intializer for the weights
         emb_dim: embedding dimension
         feature_dim: sparse input dimension, for pre-allocate memory
         combiner: reduce operator, support sum|mean
       Returns:
         a tensor represent embedding result
       Raises:
         None
    """
    import xdl.python.framework.variable as variable
    with variable.variable_info(batch_read=batch_read, save_ratio=feature_add_probability, bloom_filter=cbf):
        var = variable.Variable(name=name,
                                dtype=DataType.float,
                                shape=[feature_dim, emb_dim],
                                initializer=initializer,
                                vtype=vtype,
                                trainable = True)
    if isinstance(sparse_inputs, (list, tuple)):
        merged_sparse_inputs = merge_sparse(sparse_inputs)
        emb_dim *= len(sparse_inputs)
    else:
        assert(isinstance(sparse_inputs, MergedSparseTensor))
        merged_sparse_inputs = sparse_inputs
    if merged_sparse_inputs.has_unique_ids():
        unique_ids = merged_sparse_inputs.ids
        idx = merged_sparse_inputs.indices
        sidx = merged_sparse_inputs.sidx
        sseg = merged_sparse_inputs.sseg
    else:
        with xdl.device(device, **device_attr):
            unique_ids, idx, sidx, sseg = xdl.unique(ids, merged_sparse_inputs.groups, itype=DataType.int32)
    
    embeddings = var.gather(unique_ids)
    global _EMBEDDING_TENSOR
    _EMBEDDING_TENSOR[embeddings] = var
    import xdl.python.sparse_engine.embedding_ops as embedding_ops
    if combiner == 'sum':
        embeddings = embedding_ops.merged_ksum(
            embeddings,
            idx,
            merged_sparse_inputs.values,
            merged_sparse_inputs.segments,
            merged_sparse_inputs.groups,
            sidx,
            sseg,
            device, **device_attr)
    elif combiner == 'mean':
        embeddings = embedding_ops.merged_kmean(
            embeddings,
            idx,
            merged_sparse_inputs.values,
            merged_sparse_inputs.segments,
            merged_sparse_inputs.groups,
            sidx,
            sseg,
            device, **device_attr)
    elif combiner == 'tile':
        embeddings = embedding_ops.merged_tile(
            embeddings,
            idx,
            merged_sparse_inputs.values,
            merged_sparse_inputs.segments,
            merged_sparse_inputs.groups,
            length,
            reverse,
            device, **device_attr)
    else:
        raise Exception("Unrecognized combiner:" + str(combiner))

    emb_info = EmbeddingInfo(name, feature_dim, emb_dim, combiner, None, var, length, embeddings)
    set_embedding_info([var], emb_info)
    return embeddings
Exemplo n.º 4
0
def embedding(name, sparse_input, initializer, emb_dim, feature_dim,
              combiner='sum',
              vtype=VarType.Index,
              length=50,
              reverse=False,
              batch_read=3000,
              feature_add_probability=1.0,
              cbf=0,
              device='CPU',
              statis_list=None,
              statis_decay=0.07,
              statis_decay_period=100,
              labels=None,
              save=True,
              **device_attr):
    """xdl embedding
       Args:
         name: name for embedding, will be used for declaring variable on ps-plus
         sparse_input: a sparse tensor represent input data
         initializer: intializer for the variable on ps-plus
         emb_dim: embedding dimension
         feature_dim: sparse input dimension, for pre-allocate memory
         combiner: reduce operator, support sum|mean
       Returns:
         a tensor represent embedding result
       Raises:
         None
    """

    global EMBEDDING_LIST, EMBEDDING_SET
    if name not in EMBEDDING_SET:
        EMBEDDING_SET.add(name)
        EMBEDDING_LIST.append(name)

    import xdl.python.framework.variable as variable
    with variable.variable_info(batch_read=batch_read, save_ratio=feature_add_probability, bloom_filter=cbf, save="true" if save else "false"):
        var = variable.Variable(name=name,
                                dtype=DataType.float,
                                shape=[feature_dim, emb_dim],
                                initializer=initializer,
                                vtype=vtype,
                                trainable=True)
        if statis_list is not None:
            statis_vars = []
            for statis_type in statis_list:
                statis_var = variable.Variable(name=name,
                                               dtype=DataType.float,
                                               shape=[feature_dim, 1],
                                               initializer=xdl.Zeros(),
                                               vtype=vtype,
                                               trainable=False,
                                               statis_type=statis_type,
                                               statis_decay=statis_decay,
                                               statis_decay_period=statis_decay_period)
                statis_vars.append(statis_var)

    if sparse_input.has_unique_ids():
        unique_ids = xdl.identity_op(sparse_input.ids)
        idx = sparse_input.indices
        embeddings = var.gather(unique_ids)
        sidx = sparse_input.sidx
        sseg = sparse_input.sseg
    else:
        with xdl.device(device, **device_attr):
            unique_ids, idx, sidx, sseg = xdl.unique(sparse_input.ids, sparse_input.segments, itype=DataType.int32)
        embeddings = var.gather(unique_ids)
        
    if statis_list is not None:
        assert labels is not None
        from xdl.python.training.training_utils import get_global_step
        global_step = get_global_step()
        statis_results = []
        for statis_var in statis_vars:
            statis_result = statis_var.statis(sparse_input.ids, idx, sparse_input.segments, sidx, sseg, labels, global_step.value)
            statis_results.append(statis_result)

    global _EMBEDDING_TENSOR
    _EMBEDDING_TENSOR[embeddings] = var

    import xdl.python.sparse_engine.embedding_ops as embedding_ops
    import numpy as np
    if combiner == 'sum':
        embeddings = embedding_ops.ksum(
            embeddings,
            idx,
            sparse_input.values,
            sparse_input.segments,
            sidx,
            sseg,
            device, **device_attr)
    elif combiner == 'mean':
        embeddings = embedding_ops.kmean(
            embeddings,
            idx,
            sparse_input.values,
            sparse_input.segments,
            sidx,
            sseg,
            device, **device_attr)
    elif combiner == 'tile':
        embeddings = embedding_ops.tile(
            embeddings,
            idx,
            np.array([], dtype=np.float32),
            #sparse_input.values,
            sparse_input.segments,
            length,
            reverse,
            device, **device_attr)
    else:
        raise Exception("Unrecognized combiner:" + str(combiner))

    if sparse_input.shape is not None and len(sparse_input.shape) > 0:
        embeddings.set_shape([sparse_input.shape[0], emb_dim]);

    emb_info = EmbeddingInfo(name, feature_dim, emb_dim, combiner, None, var, length, embeddings)
    set_embedding_info([var], emb_info)
    if statis_list is not None:
        return embeddings, statis_results
    return embeddings
Exemplo n.º 5
0
def embedding(name,
              sparse_input,
              initializer,
              emb_dim,
              feature_dim,
              combiner='sum',
              vtype=VarType.Index,
              length=50,
              reverse=False,
              batch_read=3000,
              feature_add_probability=1.0):
    """xdl embedding
       Args:
         name: name for embedding, will be used for declaring variable on ps-plus
         sparse_input: a sparse tensor represent input data
         initializer: intializer for the variable on ps-plus
         emb_dim: embedding dimension
         feature_dim: sparse input dimension, for pre-allocate memory
         combiner: reduce operator, support sum|mean
       Returns:
         a tensor represent embedding result
       Raises:
         None
    """
    import xdl.python.framework.variable as variable
    with variable.variable_info(batch_read=batch_read):
        var = variable.Variable(name=name,
                                dtype=DataType.float,
                                shape=[feature_dim, emb_dim],
                                initializer=initializer,
                                vtype=vtype,
                                trainable=True)
    if sparse_input.has_unique_ids():
        unique_ids = sparse_input.ids
        idx = sparse_input.indices
        embeddings = var.gather(unique_ids, save_ratio=feature_add_probability)
    else:
        unique_ids, idx = xdl.unique(sparse_input.ids, itype=DataType.int32)
        embeddings = var.gather(unique_ids, save_ratio=feature_add_probability)

    global _EMBEDDING_TENSOR
    _EMBEDDING_TENSOR[embeddings] = var

    import xdl.python.sparse_engine.embedding_ops as embedding_ops
    if combiner == 'sum':
        embeddings = embedding_ops.ksum(embeddings, idx, sparse_input.values,
                                        sparse_input.segments)
    elif combiner == 'mean':
        embeddings = embedding_ops.kmean(embeddings, idx, sparse_input.values,
                                         sparse_input.segments)
    elif combiner == 'tile':
        embeddings = embedding_ops.tile(embeddings, idx, sparse_input.values,
                                        sparse_input.segments, length, reverse)
    else:
        raise Exception("Unrecognized combiner:" + str(combiner))

    if sparse_input.shape is not None and len(sparse_input.shape) > 0:
        embeddings.set_shape([sparse_input.shape[0], emb_dim])

    emb_info = EmbeddingInfo(name, feature_dim, emb_dim, combiner, None, var,
                             embeddings)
    set_embedding_info([var], emb_info)
    return embeddings