Python weighted_categorical_column示例，tensorflow.feature_column.weighted_categorical_column Python示例

示例#1

0

显示文件

文件： esmm.py 项目： lh0730/esmm-1

def create_feature_columns():
  # user feature
  bids = fc.categorical_column_with_hash_bucket("behaviorBids", 10240, dtype=tf.int64)
  c1ids = fc.categorical_column_with_hash_bucket("behaviorC1ids", 100, dtype=tf.int64)
  cids = fc.categorical_column_with_hash_bucket("behaviorCids", 10240, dtype=tf.int64)
  sids = fc.categorical_column_with_hash_bucket("behaviorSids", 10240, dtype=tf.int64)
  pids = fc.categorical_column_with_hash_bucket("behaviorPids", 1000000, dtype=tf.int64)
  bids_weighted = fc.weighted_categorical_column(bids, "bidWeights")
  c1ids_weighted = fc.weighted_categorical_column(c1ids, "c1idWeights")
  cids_weighted = fc.weighted_categorical_column(cids, "cidWeights")
  sids_weighted = fc.weighted_categorical_column(sids, "sidWeights")
  pids_weighted = fc.weighted_categorical_column(pids, "pidWeights")

  # item feature
  pid = fc.categorical_column_with_hash_bucket("productId", 1000000, dtype=tf.int64)
  sid = fc.categorical_column_with_hash_bucket("sellerId", 10240, dtype=tf.int64)
  bid = fc.categorical_column_with_hash_bucket("brandId", 10240, dtype=tf.int64)
  c1id = fc.categorical_column_with_hash_bucket("cate1Id", 100, dtype=tf.int64)
  cid = fc.categorical_column_with_hash_bucket("cateId", 10240, dtype=tf.int64)

  # context feature
  matchScore = fc.numeric_column("matchScore", default_value=0.0)
  popScore = fc.numeric_column("popScore", default_value=0.0)
  brandPrefer = fc.numeric_column("brandPrefer", default_value=0.0)
  cate2Prefer = fc.numeric_column("cate2Prefer", default_value=0.0)
  catePrefer = fc.numeric_column("catePrefer", default_value=0.0)
  sellerPrefer = fc.numeric_column("sellerPrefer", default_value=0.0)
  matchType = fc.indicator_column(fc.categorical_column_with_identity("matchType", 9, default_value=0))
  postition = fc.indicator_column(fc.categorical_column_with_identity("position", 201, default_value=200))
  triggerNum = fc.indicator_column(fc.categorical_column_with_identity("triggerNum", 51, default_value=50))
  triggerRank = fc.indicator_column(fc.categorical_column_with_identity("triggerRank", 51, default_value=50))
  sceneType = fc.indicator_column(fc.categorical_column_with_identity("type", 2, default_value=0))
  hour = fc.indicator_column(fc.categorical_column_with_identity("hour", 24, default_value=0))
  phoneBrand = fc.indicator_column(fc.categorical_column_with_hash_bucket("phoneBrand", 1000))
  phoneResolution = fc.indicator_column(fc.categorical_column_with_hash_bucket("phoneResolution", 500))
  phoneOs = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list("phoneOs", ["android", "ios"], default_value=0))
  tab = fc.indicator_column(fc.categorical_column_with_vocabulary_list("tab",
        ["ALL", "TongZhuang", "XieBao", "MuYing", "NvZhuang", "MeiZhuang", "JuJia", "MeiShi"], default_value=0))

  pid_embed = fc.shared_embedding_columns([pids_weighted, pid], 64, combiner='sum', shared_embedding_collection_name="pid")
  bid_embed = fc.shared_embedding_columns([bids_weighted, bid], 32, combiner='sum', shared_embedding_collection_name="bid")
  cid_embed = fc.shared_embedding_columns([cids_weighted, cid], 32, combiner='sum', shared_embedding_collection_name="cid")
  c1id_embed = fc.shared_embedding_columns([c1ids_weighted, c1id], 10, combiner='sum', shared_embedding_collection_name="c1id")
  sid_embed = fc.shared_embedding_columns([sids_weighted, sid], 32, combiner='sum', shared_embedding_collection_name="sid")
  global my_feature_columns
  my_feature_columns = [matchScore, matchType, postition, triggerNum, triggerRank, sceneType, hour, phoneBrand, phoneResolution,
             phoneOs, tab, popScore, sellerPrefer, brandPrefer, cate2Prefer, catePrefer]
  my_feature_columns += pid_embed
  my_feature_columns += sid_embed
  my_feature_columns += bid_embed
  my_feature_columns += cid_embed
  my_feature_columns += c1id_embed
  print("feature columns:", my_feature_columns)
  return my_feature_columns

示例#2

0

显示文件

def test_weighted_categorical_column():
    f_in = open("new.tf.rec.base64", "r")
    for line in f_in:
        try:
            b = base64.b64decode(line.strip())
        except Exception as e:
            sys.stderr.write(e)
            continue

        exa = example_pb2.Example()
        print("before parse proto...........")
        try:
            exa.ParseFromString(b)
        except Exception as e:
            sys.stderr.write(e.str())
            continue
        print("after parse proto........")
        #print (exa)
        u_pocs_l1_norm = feature_column.categorical_column_with_hash_bucket(
            "u_pocs_l1_norm", 3000)
        u_pocs_l1_norm_weighted = feature_column.weighted_categorical_column(
            u_pocs_l1_norm, weight_feature_key='u_pocs_l1_norm_val')
        feature_columns = [u_pocs_l1_norm_weighted]
        features = tf.parse_single_example(
            b, tf.feature_column.make_parse_example_spec(feature_columns))
        print(features["u_pocs_l1_norm"])
        with tf.Session() as session:
            session.run(tf.global_variables_initializer())
            session.run(tf.tables_initializer())
            print(session.run(features["u_pocs_l1_norm"]))
        break

示例#3

0

显示文件

文件： feature_columns.py 项目： kaixih/learn_tensorflow

def test_weighted_categorical_column():
    # 1. Input features
    color_data = {
        'color': [['R'], ['G'], ['B'], ['A']],
        'weight': [[1.0], [2.0], [4.0], [8.0]]
    }
    # 2. Feature columns (Sparse)
    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)
    # 2. Feature columns (Sparse)
    color_weight_categorical_column \
        = feature_column.weighted_categorical_column(color_column, 'weight')
    builder = _LazyBuilder(color_data)
    id_tensor, weight = color_weight_categorical_column._get_sparse_tensors(
        builder)

    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('weighted categorical' + '-' * 40)
        print(session.run([id_tensor]))
        print('-' * 40)
        print(session.run([weight]))

    # 2. Feature columns (Dense)
    weighted_column = feature_column.indicator_column(
        color_weight_categorical_column)
    # 3. Feature tensor
    weighted_column_dense_tensor = feature_column.input_layer(
        color_data, [weighted_column])
    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run([weighted_column_dense_tensor]))

示例#4

0

显示文件

文件： columns.py 项目： shboy/game_contest

def _add_weighted_embedding_columns(columns, features, feature_table, vocabulary):
    for f in features:
        assert f in feature_table
        weighted_column = fc.weighted_categorical_column(
            fc.categorical_column_with_vocabulary_list(f, vocabulary.vocab[f]), f + _WEIGHTED_SUFFIX)
        emb_weighted_column = fc.embedding_column(weighted_column, feature_table[f].emb_width, combiner='sqrtn')
        columns.append(emb_weighted_column)

示例#5

0

显示文件

def test_weighted_cate_column():
    # !!! id=''代表missing，其对应的weight只能为0，否则会导致id和weight长度不一致而报错
    # !!! 而且weight必须是float型，输入int会报错
    x_values = {
        'id': [[b'a', b'z', b'a', b'c'], [b'b', b'', b'd', b'b']],
        'weight': [[1.0, 2.0, -3.0, 4.0], [5.0, 0.0, 7.0, -8.0]]
    }
    builder = _LazyBuilder(x_values)  # lazy representation of input

    # ================== define ops
    sparse_id_featcol = feature_column.categorical_column_with_vocabulary_list(
        'id', ['a', 'b', 'c'], dtype=tf.string, default_value=-1)
    sparse_featcol = feature_column.weighted_categorical_column(
        categorical_column=sparse_id_featcol, weight_feature_key='weight')
    x_sparse_tensor = sparse_featcol._get_sparse_tensors(builder)

    # indicator_column将sparse tensor转换成dense MHE格式, shape=[batch_size, #tokens]
    # 其中的权重是这个token出现的所有权重的总和
    dense_featcol = feature_column.indicator_column(sparse_featcol)
    x_dense_tensor = feature_column.input_layer(x_values, [dense_featcol])

    # ================== run
    with tf.Session() as sess:
        # 必须initialize table，否则报错
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())

        id_sparse_value, weight_sparse_value = sess.run(
            [x_sparse_tensor.id_tensor, x_sparse_tensor.weight_tensor])

        print("************************* sparse id tensor")
        # sparse tensor's id_tensor保持与原始输入相同的形状，[batch_size, max_tokens_per_example]=[2,4]
        # SparseTensorValue(indices=array(
        #       [[0, 0],
        #        [0, 1],
        #        [0, 2],
        #        [0, 3],
        #        [1, 0],
        #        [1, 2],
        #        [1, 3]]), values=array([ 0, -1,  0,  2,  1, -1,  1]), dense_shape=array([2, 4]))
        print(id_sparse_value)

        print("************************* sparse weight tensor")
        # sparse tensor's weight_tensor保持与原始输入相同的形状，[batch_size, max_tokens_per_example]=[2,4]
        # SparseTensorValue(indices=array(
        #       [[0, 0],
        #        [0, 1],
        #        [0, 2],
        #        [0, 3],
        #        [1, 0],
        #        [1, 2],
        #        [1, 3]]), values=array([ 1.,  2., -3.,  4.,  5.,  7., -8.], dtype=float32), dense_shape=array([2, 4]))
        print(weight_sparse_value)

        print("************************* dense MHE tensor")
        # indicator_column将sparse tensor按照MHE的方式转化成dense tensor，shape=[batch_size, total_tokens_in_vocab]
        # 其中的每个数值是该token出现的所有权重的总和
        # [[-2.  0.  4.]
        #  [ 0. -3.  0.]]
        print(sess.run(x_dense_tensor))

示例#6

0

显示文件

文件： test_weighted_categorical_column.py 项目： enyun/2018learning

def test_weighted_categorical_column():
    color_data = {
        'color': [['R'], ['G'], ['B'], ['A']],
        'weight': [[1.0], [2.0], [4.0], [8.0]]
    }  # 4行样本

    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)

    color_weight_categorical_column = feature_column.weighted_categorical_column(
        color_column, 'weight')

    builder = _LazyBuilder(color_data)

    with tf.Session() as session:
        id_tensor, weight = color_weight_categorical_column._get_sparse_tensors(
            builder)

        session.run(tf.global_variables_initializer())

        session.run(tf.tables_initializer())

        print('weighted categorical' + '-' * 40)

        print(session.run([id_tensor]))
        print('-' * 40)
        print(session.run([weight]))

示例#7

0

显示文件

文件： deepfm_input_fn.py 项目： zwcdp/deeplearning

def create_interaction_feature_columns(shared_embedding_dim=60):
  # user embedding features
  phoneBrandId = fc.categorical_column_with_hash_bucket("phoneBrand", 1000)
  phoneBrand = fc.shared_embedding_columns([phoneBrandId], shared_embedding_dim)
  phoneResolutionId = fc.categorical_column_with_hash_bucket("phoneResolution", 500)
  phoneResolution = fc.shared_embedding_columns([phoneResolutionId], shared_embedding_dim)
  bids = fc.categorical_column_with_hash_bucket("behaviorBids", 10240, dtype=tf.int64)
  c1ids = fc.categorical_column_with_hash_bucket("behaviorC1ids", 100, dtype=tf.int64)
  cids = fc.categorical_column_with_hash_bucket("behaviorCids", 10240, dtype=tf.int64)
  sids = fc.categorical_column_with_hash_bucket("behaviorSids", 10240, dtype=tf.int64)
  pids = fc.categorical_column_with_hash_bucket("behaviorPids", 1000000, dtype=tf.int64)
  bids_weighted = fc.weighted_categorical_column(bids, "bidWeights")
  c1ids_weighted = fc.weighted_categorical_column(c1ids, "c1idWeights")
  cids_weighted = fc.weighted_categorical_column(cids, "cidWeights")
  sids_weighted = fc.weighted_categorical_column(sids, "sidWeights")
  pids_weighted = fc.weighted_categorical_column(pids, "pidWeights")

  # item embedding features
  pid = fc.categorical_column_with_hash_bucket("productId", 1000000, dtype=tf.int64)
  sid = fc.categorical_column_with_hash_bucket("sellerId", 10240, dtype=tf.int64)
  bid = fc.categorical_column_with_hash_bucket("brandId", 10240, dtype=tf.int64)
  c1id = fc.categorical_column_with_hash_bucket("cate1Id", 100, dtype=tf.int64)
  c2id = fc.categorical_column_with_hash_bucket("cate2Id", 500, dtype=tf.int64)
  cid = fc.categorical_column_with_hash_bucket("cateId", 10240, dtype=tf.int64)

  # shared embedding
  pid_emb = fc.shared_embedding_columns([pids_weighted, pid], shared_embedding_dim, combiner='sum')
  bid_emb = fc.shared_embedding_columns([bids_weighted, bid], shared_embedding_dim, combiner='sum')
  cid_emb = fc.shared_embedding_columns([cids_weighted, cid], shared_embedding_dim, combiner='sum')
  c1id_emb = fc.shared_embedding_columns([c1ids_weighted, c1id], shared_embedding_dim, combiner='sum')
  sid_emb = fc.shared_embedding_columns([sids_weighted, sid], shared_embedding_dim, combiner='sum')
  c2id_emb = fc.shared_embedding_columns([c2id], shared_embedding_dim)

  columns = phoneBrand
  columns += phoneResolution
  columns += pid_emb
  columns += sid_emb
  columns += bid_emb
  columns += cid_emb
  columns += c1id_emb
  columns += c2id_emb
  print("interaction feature columns:", columns)
  return columns

示例#8

0

显示文件

def test_weighted_categorical_feature_embedding():
    color_data = {
        'color': [['R', 'R'], ['G', 'G'], ['B', 'B'], ['G', 'R'], ['G', 'B'],
                  ['B', 'R']],
        'weight': [[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.3, 0.2], [0.4, 0.3],
                   [0.4, 0.6]]
    }  # 6行样本

    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)

    color_embeding = feature_column.embedding_column(color_column,
                                                     7,
                                                     combiner="sum")
    color_embeding_dense_tensor = feature_column.input_layer(
        color_data, [color_embeding])

    color_weight_categorical_column = feature_column.weighted_categorical_column(
        color_column, 'weight')
    color_embeding_weighted = feature_column.embedding_column(
        color_weight_categorical_column, 7, combiner="sum")
    color_embeding_dense_tensor_2 = feature_column.input_layer(
        color_data, [color_embeding_weighted])

    builder = _LazyBuilder(color_data)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    color_weighted_tensor = color_weight_categorical_column._get_sparse_tensors(
        builder)  ## is a pair (id_tensor, weight_tensor)

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))
        print("color column weight:")
        print(color_column_tensor.weight_tensor)
        print("color column weighted categorical,  weight:")
        print(session.run([color_weighted_tensor.id_tensor]))
        print(session.run([color_weighted_tensor.weight_tensor]))

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('embeding' + '-' * 40)
        print(session.run([color_embeding_dense_tensor]))
        print('embeding weighted categorical column')
        print(session.run([color_embeding_dense_tensor_2]))

示例#9

0

显示文件

文件： DataMake.py 项目： HZYLF/deep_learning-1

def create_embedding_feature_columns(shared_embedding_dim=64):
    '''
    describe:当我们需要对特征进行embedding共享对时候
    :return:
    '''
    # 点击category id
    c1ids = fc.categorical_column_with_hash_bucket("behaviorC1ids",
                                                   100,
                                                   dtype=tf.int64)
    # 对clids进行加权赋值，有点像attention
    c1ids_weighted = fc.weighted_categorical_column(c1ids, "c1idWeights")
    # category id
    c1id = fc.categorical_column_with_hash_bucket("cate1Id",
                                                  100,
                                                  dtype=tf.int64)
    # c1ids_weighted 和 c1id中都用到了category id，但是这边是保证了其在同一个embedding空间，并不是特征一致
    # 此处c1id_emb会返回长度为2的列表，每个元素的是shared_embedding_dim维的tenser，总长2*shared_embedding_dim
    c1id_emb = fc.shared_embedding_columns([c1ids_weighted, c1id],
                                           shared_embedding_dim,
                                           combiner='sum')
    return c1id_emb

示例#10

0

显示文件

    def __init__(self,
                 name,
                 classifier=None,
                 delimiter="|",
                 sep="@",
                 **kwargs):
        """微博标签类

        :param name:
        :param index:
        :param dtype:
        :param delimiter: char, 标签间的分隔符，默认是@
        :param sep: char, 标签内，名字和权重的分隔符，默认是|
        :param kwargs:
        """
        super(TagColumn, self).__init__(name, classifier)

        assert isinstance(delimiter, str) and len(delimiter) == 1, (
            "{}: delimiter must be char,"
            "while get: {}".format(name, delimiter))
        self.delimiter = delimiter

        assert isinstance(sep, str) and len(sep) == 1, (
            "{}: sep must be char, while get: {}".format(name, sep))
        self.sep = sep

        self._tag_name = name + "_tag"
        self._weight_name = name + "_weight"

        if "num_buckets" in kwargs:
            raise ValueError("num_buckets is not supported in TagColumn.")
        self._input_column = _get_category_column_from_dict(
            self._tag_name, kwargs)
        self._weight_input_column = fc.weighted_categorical_column(
            self._input_column, self._weight_name)
        self._feature_column = fc.indicator_column(self._weight_input_column)

示例#11

0

显示文件

文件： record_single_use.py 项目： violet2016/hawq-recommend

import numpy as np
import tensorflow as tf
import tensorflow.feature_column as fc

categorical_column = fc.categorical_column_with_hash_bucket(
    key='query_plan_ops', hash_bucket_size=200)
weighted_column = fc.weighted_categorical_column(
    categorical_column=categorical_column, weight_feature_key='op_freq')
#feature_env = fc.numeric_column('env', shape=(1,4), dtype=tf.int64)
#feature_label = fc.numeric_column('label', shape=(1,), dtype=tf.float32)
#env_columns = tf.FixedLenFeature([1, 4], tf.int64)
#exec_time = tf.FixedLenFeature([], tf.float32)
cpu_column = fc.numeric_column('cpu', (1, 1))
env_columns = fc.numeric_column('env', (1, 3))
total_ops = fc.numeric_column('total_ops')
#exec_time = fc.numeric_column('label')
cat_table_size = fc.categorical_column_with_hash_bucket(key='table_size',
                                                        hash_bucket_size=20)
weighted_column_table = fc.weighted_categorical_column(
    categorical_column=cat_table_size, weight_feature_key='table_size_weight')
feature_columns = [
    cpu_column, env_columns, weighted_column, total_ops, weighted_column_table
]

fmap = fc.make_parse_example_spec(feature_columns)

#fmap['env'] = env_columns


#fmap['label'] = exec_time
#print(fmap)

示例#12

0

显示文件

文件： deepLearning_API_test1.py 项目： renlang97/TensorFlowCtrCvrStutyGuideBook

                                             dtype=tf.int64)
pid_embed = fc.shared_embedding_columns([pids, pid],
                                        100,
                                        combiner='sum',
                                        shared_embedding_collection_name="pid")
"""
那么如何实现weighted sum pooling操作呢？答案就是使用weighted_categorical_column函数。我们必须在构建样本时添加一个额外的权重特征,
权重特征表示行为序列中每个产品的权重,因此权重特征是一个与行为序列平行的列表（向量）,两者的维度必须相同。
另外,如果行为序列中有填充的默认值-1,那么权重特征中这些默认值对应的权重必须为0。代码示例如下：
"""
from tensorflow import feature_column as fc
# user field
pids = fc.categorical_column_with_hash_bucket("behaviorPids",
                                              10240,
                                              dtype=tf.int64)
pids_weighted = fc.weighted_categorical_column(pids, "pidWeights")
# item field
pid = fc.categorical_column_with_hash_bucket("productId",
                                             1000000,
                                             dtype=tf.int64)
pid_embed = fc.shared_embedding_columns([pids_weighted, pid],
                                        100,
                                        combiner='sum',
                                        shared_embedding_collection_name="pid")
"""
模型函数
Base模型的其他组件就不过多介绍了,模型函数的代码如下：
"""


def my_model(features, labels, mode, params):

示例#13

0

显示文件

def create_feature_columns():
    # user feature
    bids = fc.categorical_column_with_hash_bucket("behaviorBids",
                                                  10000,
                                                  dtype=tf.int64)
    c1ids = fc.categorical_column_with_hash_bucket("behaviorC1ids",
                                                   100,
                                                   dtype=tf.int64)
    cids = fc.categorical_column_with_hash_bucket("behaviorCids",
                                                  10000,
                                                  dtype=tf.int64)
    sids = fc.categorical_column_with_hash_bucket("behaviorSids",
                                                  10000,
                                                  dtype=tf.int64)
    pids = fc.categorical_column_with_hash_bucket("behaviorPids",
                                                  500000,
                                                  dtype=tf.int64)
    bids_weighted = fc.weighted_categorical_column(bids, "bidWeights")
    c1ids_weighted = fc.weighted_categorical_column(c1ids, "c1idWeights")
    cids_weighted = fc.weighted_categorical_column(cids, "cidWeights")
    sids_weighted = fc.weighted_categorical_column(sids, "sidWeights")
    pids_weighted = fc.weighted_categorical_column(pids, "pidWeights")
    pid_embed = fc.embedding_column(pids_weighted, 64)
    bid_embed = fc.embedding_column(bids_weighted, 32)
    cid_embed = fc.embedding_column(cids_weighted, 48)
    c1id_embed = fc.embedding_column(c1ids_weighted, 10)
    sid_embed = fc.embedding_column(sids_weighted, 32)
    phoneBrandId = fc.categorical_column_with_hash_bucket("phoneBrand", 1000)
    phoneBrand = fc.embedding_column(phoneBrandId, 20)
    phoneResolutionId = fc.categorical_column_with_hash_bucket(
        "phoneResolution", 500)
    phoneResolution = fc.embedding_column(phoneResolutionId, 10)
    phoneOs = fc.indicator_column(
        fc.categorical_column_with_vocabulary_list("phoneOs",
                                                   ["android", "ios"],
                                                   default_value=0))
    gender = fc.indicator_column(
        fc.categorical_column_with_identity("gender",
                                            num_buckets=3,
                                            default_value=0))
    age_class = fc.indicator_column(
        fc.categorical_column_with_identity("age_class",
                                            num_buckets=7,
                                            default_value=0))
    has_baby = fc.indicator_column(
        fc.categorical_column_with_identity("has_baby",
                                            num_buckets=2,
                                            default_value=0))
    baby_gender = fc.indicator_column(
        fc.categorical_column_with_identity("baby_gender",
                                            num_buckets=3,
                                            default_value=0))
    baby_age = fc.indicator_column(
        fc.categorical_column_with_identity("baby_age",
                                            num_buckets=7,
                                            default_value=0))
    grade = fc.indicator_column(
        fc.categorical_column_with_identity("grade",
                                            num_buckets=7,
                                            default_value=0))
    rfm_type = fc.indicator_column(
        fc.categorical_column_with_identity("bi_rfm_type",
                                            num_buckets=12,
                                            default_value=0))
    city_id = fc.categorical_column_with_hash_bucket("city", 700)
    city = fc.embedding_column(city_id, 16)
    userType = fc.indicator_column(
        fc.categorical_column_with_identity("user_type", 6, default_value=0))
    hour = fc.indicator_column(
        fc.categorical_column_with_identity("hour", 24, default_value=0))

    global my_feature_columns
    my_feature_columns = [
        userType, hour, gender, age_class, has_baby, baby_gender, baby_age,
        grade, rfm_type, phoneBrand, phoneResolution, phoneOs, pid_embed,
        sid_embed, bid_embed, cid_embed, c1id_embed, city
    ]
    print("feature columns:", my_feature_columns)
    return my_feature_columns

示例#14

0

显示文件

文件： record.py 项目： violet2016/hawq-recommend

import numpy as np
import tensorflow as tf
import tensorflow.feature_column as fc

categorical_column = fc.categorical_column_with_hash_bucket(
    key='query_plan_ops', hash_bucket_size=20)
weighted_column = fc.weighted_categorical_column(
    categorical_column=categorical_column, weight_feature_key='op_freq')
#feature_env = fc.numeric_column('env', shape=(1,4), dtype=tf.int64)
#feature_label = fc.numeric_column('label', shape=(1,), dtype=tf.float32)
#env_columns = tf.FixedLenFeature([1, 4], tf.int64)
#exec_time = tf.FixedLenFeature([], tf.float32)
cpu = fc.numeric_column('cpu')
env_columns = fc.numeric_column('env', (1, 3))
total_ops = fc.numeric_column('total_ops')
exec_time = fc.numeric_column('label')
feature_columns = [cpu, env_columns, weighted_column, total_ops, exec_time]

fmap = fc.make_parse_example_spec(feature_columns)


#fmap['env'] = env_columns
#fmap['label'] = exec_time
#print(fmap)
#https://jhui.github.io/2017/11/21/TensorFlow-Importing-data/
def parser(serialized_example):
    """Parses a single tf.Example into image and label tensors."""

    features = tf.parse_single_example(
        serialized_example,
        # features={