Пример #1
0
 def end(self):
     self.gstep_val = xdl.execute(self._global_step.value) + 1
     if self._meta is not None:
         self.meta_val = xdl.execute(self._meta.values())
     self._save_ckpt(self.gstep_val, self.meta_val)
     if self.gstep_val != self._last_save_step:
         self._ckpt_queue.append(self._create_version(self.gstep_val))
     self._check_ckpt_queue()
Пример #2
0
def _export_sparse_var(ckpt_dir, output_dir, var, vtype='hash', dim=18):
    def _string_to_int8(src):
        return np.array([ord(ch) for ch in src], dtype=np.int8)
    print(var)
    op = xdl.ps_convert_ckpt_variable_op(
        checkpoint_dir=_string_to_int8(ckpt_dir),
        output_dir=_string_to_int8(output_dir),
        variables=_string_to_int8(var))
    xdl.execute(op)
    if vtype.startswith('hash') == False:
        for v in var.split(','):
            _transfer_sparse_idx_to_hash(os.path.join(output_dir, v), dim, offset=0)
 def test_gpu(self):
     with xdl.device("GPU"):
         grps = np.array([],dtype=np.int32)
         ksum_grad = xdl.ksum_grad(embeds, idx, values, segs, grps, grads)
         ksum_grad = xdl.execute(ksum_grad)
         res = np.array([[0.4],[0.3],[0.6],[0.2],[0.3],[0.3]],dtype=np.float)
         self.assertTrue(np.allclose(ksum_grad, res))
Пример #4
0
 def test_split_0_dim(self):
     value = np.array([[10, 20], [30, 40], [50, 60], [70, 80]])
     num_or_size_splits = np.array([1, 3])
     a, b = xdl.split(value, num_or_size_splits, 0, 2)
     a, b = xdl.execute([a, b])
     self.assertTrue(np.alltrue(a == [[10, 20]]))
     self.assertTrue(np.alltrue(b == [[30, 40], [50, 60], [70, 80]]))
Пример #5
0
 def test_merged_gpu(self):
     with xdl.device("GPU"):
         ksum_grad = xdl.ksum_grad(embeds_shape, idx, values, segs, grps,
                                   sidx_nogrp, sseg, merged_grads)
         ksum_grad = xdl.execute(ksum_grad)
         res = np.array([[7], [5], [12], [3], [5], [6]], dtype=np.float)
         self.assertTrue(np.allclose(ksum_grad, res))
Пример #6
0
 def test_split_neg_dim_x(self):
     value = np.array([[10, 20], [30, 40], [50, 60], [70, 80]])
     num_or_size_splits = np.array(2)
     a, b = xdl.split(value, num_or_size_splits, -1, 2)
     a, b = xdl.execute([a, b])
     self.assertTrue(np.alltrue(a == [[10], [30], [50], [70]]))
     self.assertTrue(np.alltrue(b == [[20], [40], [60], [80]]))
Пример #7
0
 def test_concat_x3(self):
     a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
     b = np.array([[[11, 12], [13, 14]], [[15, 16], [17, 18]]])
     c = np.array([[[21, 22], [23, 24]], [[25, 26], [27, 28]]])
     r = xdl.concat([a, b, c], axis=-3)
     r = xdl.execute(r)
     self.assertTrue(np.alltrue(r == np.concatenate([a, b, c], axis=-3)))
 def test_gpu(self):
     with xdl.device("GPU"):
         out = xdl.take_grad(comm_grad, indicator, comm)
         out = xdl.execute(out)
         res = np.array([[0.5, 0.7, 0.9], [0.8, 1.0, 1.2], [0.4, 0.5, 0.6]],
                        dtype=np.float)
         self.assertTrue(np.allclose(out, res))
Пример #9
0
 def test_gpu_kavg(self):
     with xdl.device("GPU"):
         grps = np.array([], dtype=np.int32)
         ksum = xdl.ksum(embeds, idx, values, segs, grps, average=True)
         ksum = xdl.execute(ksum)
         res = np.array([[0.02], [0.03], [0.0375]], dtype=np.float)
         self.assertTrue(np.allclose(ksum, res))
Пример #10
0
 def test_gpu_ksum(self):
     with xdl.device("GPU"):
         grps = np.array([], dtype=np.int32)
         ksum = xdl.ksum(embeds, idx, values, segs, grps, sidx, sseg)
         ksum = xdl.execute(ksum)
         res = np.array([[0.06], [0.09], [0.15]], dtype=np.float)
         self.assertTrue(np.allclose(ksum, res))
Пример #11
0
 def test_gpu_merged_ksum(self):
     with xdl.device("GPU"):
         ksum = xdl.ksum(embeds, idx, values, segs, grps, sidx, sseg)
         ksum = xdl.execute(ksum)
         res = np.array([[0.03, 0.03], [0.04, 0.05], [0.05, 0.1]],
                        dtype=np.float)
         self.assertTrue(np.allclose(ksum, res))
Пример #12
0
 def test_cpu(self):
     out = xdl.take_op(comm, indicator)
     out = xdl.execute(out)
     res = np.array([[0.1, 0.2, 0.3], [0.1, 0.2, 0.3], [0.4, 0.5, 0.6],
                     [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]],
                    dtype=np.float)
     self.assertTrue(np.allclose(out, res))
Пример #13
0
 def test_stack_x4(self):
     a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
     b = np.array([[[11, 12], [13, 14]], [[15, 16], [17, 18]]])
     c = np.array([[[21, 22], [23, 24]], [[25, 26], [27, 28]]])
     r = xdl.stack([a, b, c], axis=-4)
     r = xdl.execute(r)
     self.assertTrue(np.alltrue(r == np.stack([a, b, c], axis=-4)))
Пример #14
0
 def test_constant(self):
     a = xdl.convert_to_tensor(1)
     b = xdl.convert_to_tensor([10, 20])
     c = xdl.convert_to_tensor(np.array([30, 40]))
     a, b, c = xdl.execute([a, b, c])
     self.assertTrue(a == 1)
     self.assertTrue((b == np.array([10, 20])).all())
     self.assertTrue((c == np.array([30, 40])).all())
Пример #15
0
    def test_unique_cpu_1d(self):
        res_uniq = np.array([0, 2, 1, 3])
        res_idx = np.array([
            3, 2, 1, 2, 3, 2, 1, 0, 2, 3, 2, 2, 1, 0, 2, 3, 3, 1, 3, 1, 1, 2,
            1, 0
        ])
        res_sidx = np.array([
            2, 6, 10, 0, 2, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 9, 0, 1, 3, 6,
            7, 8
        ])
        res_sseg = np.array([3, 10, 18, 24])

        segment = np.array([3, 5, 8, 10, 11, 12, 16, 18, 20, 22, 24], np.int32)
        uniq, idx, sidx, sseg = xdl.unique(input=data,
                                           segment=segment,
                                           itype=DataType.int32)
        uniq, idx, sidx, sseg = xdl.execute([uniq, idx, sidx, sseg])
        self.assertTrue((uniq == res_uniq).all())
        self.assertTrue((idx == res_idx).all())
        self.assertTrue((sidx == res_sidx).all())
        self.assertTrue((sseg == res_sseg).all())

        segment = np.array([3, 5, 8, 10, 11, 12, 16, 18, 20, 22, 24], np.int64)
        uniq, idx, sidx, sseg = xdl.unique(input=data,
                                           segment=segment,
                                           itype=DataType.int64)
        uniq, idx, sidx, sseg = xdl.execute([uniq, idx, sidx, sseg])
        self.assertTrue((uniq == res_uniq).all())
        self.assertTrue((idx == res_idx).all())
        self.assertTrue((sidx == res_sidx).all())
        self.assertTrue((sseg == res_sseg).all())

        segment2 = np.array([3, 5, 5, 8, 10, 11, 12, 12, 16, 18, 20, 22, 24],
                            np.int32)
        uniq, idx, sidx, sseg = xdl.unique(input=data,
                                           segment=segment2,
                                           itype=DataType.int32)
        uniq, idx, sidx, sseg = xdl.execute([uniq, idx, sidx, sseg])
        res_sidx = np.array([
            3, 8, 12, 0, 3, 8, 9, 10, 11, 12, 0, 1, 3, 4, 5, 6, 8, 11, 0, 1, 4,
            8, 9, 10
        ])
        self.assertTrue((uniq == res_uniq).all())
        self.assertTrue((idx == res_idx).all())
        self.assertTrue((sidx == res_sidx).all())
        self.assertTrue((sseg == res_sseg).all())
Пример #16
0
 def test_unique_cpu_2d(self):
     res_uniq = np.array([[3, 1], [2, 1], [2, 0], [1, 3], [1, 1], [3, 2]])
     res_idx = np.array([0, 1, 0, 2, 3, 4, 2, 3, 5, 5, 1, 2])
     uniq, idx = xdl.unique(data.reshape((data.size / 2, 2)),
                            itype=DataType.int32)
     uniq, idx = xdl.execute([uniq, idx])
     self.assertTrue((uniq == res_uniq).all())
     self.assertTrue((idx == res_idx).all())
Пример #17
0
 def test_auc(self):
     labels = np.array([1, 0, 1, 1, 0, 0, 1, 1, 0, 0], dtype=np.float32)
     predicts = np.array([0.7, 0.2, 0.6, 0.8, 0.1, 0.2, 0.6, 0.9, 0.1, 0.1],
                         dtype=np.float32)
     res = xdl.auc(predicts, labels)
     execute(xdl.variable_registers())
     execute(xdl.global_initializers())
     res = xdl.execute(res)
     print res
Пример #18
0
 def test_unique_cpu_1d(self):
     res_uniq = np.array([3, 1, 2, 0])
     res_idx = np.array([
         0, 1, 2, 1, 0, 1, 2, 3, 1, 0, 1, 1, 2, 3, 1, 0, 0, 2, 0, 2, 2, 1,
         2, 3
     ])
     uniq, idx = xdl.unique(data, itype=DataType.int32)
     uniq, idx = xdl.execute([uniq, idx])
     self.assertTrue((uniq == res_uniq).all())
     self.assertTrue((idx == res_idx).all())
Пример #19
0
    def test_merged_cpu_with_values(self):
        ksum_grad = xdl.ksum_grad(embeds_shape, idx, values, segs, grps, sidx,
                                  sseg, merged_grads)
        ksum_grad = xdl.execute(ksum_grad)
        res = np.array([[7], [5], [12], [3], [5], [6]], dtype=np.float)
        self.assertTrue(np.allclose(ksum_grad, res))

        ksum_grad = xdl.ksum_grad(embeds_shape,
                                  idx,
                                  values,
                                  segs,
                                  grps,
                                  sidx,
                                  sseg,
                                  merged_grads,
                                  average=True)
        ksum_grad = xdl.execute(ksum_grad)
        res = np.array([[2.5], [2.5], [6], [3], [5], [2]], dtype=np.float)
        self.assertTrue(np.allclose(ksum_grad, res))
Пример #20
0
 def test_cpu(self):
     merged_sparse = xdl.merge_sparse_op([ids1, ids2], [val1, val2],
                                         [seg1, seg2])
     ids, vals, segs, grps = xdl.execute(merged_sparse)
     res_ids = np.array([1, 2, 6, 3, 7, 8, 9, 4, 5, 10], dtype=np.int64)
     res_vals = np.array([0, 0, 1, 0, 1, 1, 1, 0, 0, 1], dtype=np.float)
     res_segs = np.array([3, 7, 10])
     res_grps = np.array([2, 3, 4, 7, 9, 10])
     self.assertTrue((ids == res_ids).all())
     self.assertTrue(np.allclose(vals, res_vals))
     self.assertTrue((segs == res_segs).all())
     self.assertTrue((grps == res_grps).all())
Пример #21
0
 def test_cpu_merged_kavg(self):
     ksum = xdl.ksum(embeds,
                     idx,
                     values,
                     segs,
                     grps,
                     sidx,
                     sseg,
                     average=True)
     ksum = xdl.execute(ksum)
     res = np.array([[0.015, 0.03], [0.04, 0.025], [0.05, 0.03333333]],
                    dtype=np.float)
     self.assertTrue(np.allclose(ksum, res))
 def test_add_1d(self):
     ids1 = np.array([1,2,3,4,5,6],dtype=np.int64)
     eb1 = np.array([0.1,0.2,0.3,0.4,0.5,0.6],dtype=np.float).reshape((-1,1))
     ids2 = np.array([0,2,3,5,7,9],dtype=np.int64)
     eb2 = np.array([0.,0.2,0.3,0.5,0.7,0.9],dtype=np.float).reshape((-1,1))
     ids3 = np.array([1,4,5,7,8],dtype=np.int64)
     eb3 = np.array([0.1,0.4,0.5,0.7,0.8],dtype=np.float).reshape((-1,1))
     res_eb = np.array([0.2,0.4,0.6,0.8,1.5,0.6,0.0,1.4,0.9,0.8],dtype=np.float).reshape((-1,1))
     res_id = np.array([1,2,3,4,5,6,0,7,9,8],dtype=np.int64)
     add_sparse = xdl.sparse_grad_add_op([eb1,eb2,eb3],[ids1,ids2,ids3])
     eb, id = xdl.execute(add_sparse)
     self.assertTrue((res_id == id).all())
     self.assertTrue(np.allclose(res_eb, eb))
Пример #23
0
 def test_cpu_2d(self):
     merged_sparse = xdl.merge_sparse_op([ids3, ids4], [val1, val2],
                                         [seg1, seg2])
     ids, vals, segs, grps = xdl.execute(merged_sparse)
     res_ids = np.array([[1, 2], [3, 4], [11, 12], [5, 6], [13, 14],
                         [15, 16], [17, 18], [7, 8], [9, 10], [19, 20]])
     res_vals = np.array([0, 0, 1, 0, 1, 1, 1, 0, 0, 1])
     res_segs = np.array([3, 7, 10])
     res_grps = np.array([2, 3, 4, 7, 9, 10])
     self.assertTrue((ids == res_ids).all())
     self.assertTrue(np.allclose(vals, res_vals))
     self.assertTrue((segs == res_segs).all())
     self.assertTrue((grps == res_grps).all())
Пример #24
0
 def test_cpu_tile_reverse(self):
     res = xdl.tile(embeds,
                    idx,
                    values,
                    segs,
                    grps,
                    length=length,
                    reverse=True)
     res = xdl.execute(res)
     res_tile = np.array([[1.2, 0.4, 0.3], [0.4, 0.0, 0.0], [3.0, 1.0, 0.0],
                          [0.0, 0.0, 0.0], [3.0, 5.4, 3.2]],
                         dtype=np.float)
     self.assertTrue(np.allclose(res, res_tile))
Пример #25
0
 def test_cpu_tile(self):
     res = xdl.tile_grad(embeds,
                         idx,
                         values,
                         segs,
                         grps,
                         grads,
                         length=length,
                         reverse=False)
     res = xdl.execute(res)
     res_grad = np.array([[10.7], [3.9], [0.1], [12.1], [4.8], [13.5]],
                         dtype=np.float)
     self.assertTrue(np.allclose(res, res_grad))
Пример #26
0
    def test_cpu_with_values(self):
        grps = np.array([], dtype=np.int32)
        ksum_grad = xdl.ksum_grad(embeds_shape, idx, values, segs, grps,
                                  sidx_nogrp, sseg, grads)
        ksum_grad = xdl.execute(ksum_grad)
        res = np.array([[4], [3], [6], [2], [3], [3]], dtype=np.float)
        self.assertTrue(np.allclose(ksum_grad, res))

        ksum_grad = xdl.ksum_grad(embeds_shape,
                                  idx,
                                  values,
                                  segs,
                                  grps,
                                  sidx_nogrp,
                                  sseg,
                                  grads,
                                  average=True)
        ksum_grad = xdl.execute(ksum_grad)
        res = np.array(
            [[1.0833333], [1], [1.75], [0.66666666], [0.75], [0.75]],
            dtype=np.float)
        self.assertTrue(np.allclose(ksum_grad, res))
Пример #27
0
 def test_cpu_tile_reverse(self):
     res = xdl.tile_grad(embeds,
                         idx,
                         values,
                         segs,
                         grps,
                         grads,
                         length=length,
                         reverse=True)
     res = xdl.execute(res)
     res_grad = np.array([[1.6], [4.4], [13.3], [12.3], [4.2], [12.6]],
                         dtype=np.float)
     self.assertTrue(np.allclose(res, res_grad))
 def test_add_2d_gpu(self):
     ids1 = np.array([0,1,2,3,4,5],dtype=np.int64).reshape((-1,2))
     eb1 = np.array([0.,0.1,0.2,0.3,0.4,0.6],dtype=np.float).reshape((-1,2))
     ids2 = np.array([0,1,4,5,6,7],dtype=np.int64).reshape((-1,2))
     eb2 = np.array([0.,0.1,0.4,0.5,0.6,0.7],dtype=np.float).reshape((-1,2))
     ids3 = np.array([2,3,4,6],dtype=np.int64).reshape((-1,2))
     eb3 = np.array([0.2,0.3,0.4,0.6],dtype=np.float).reshape((-1,2))
     res_id = np.array([0,1,2,3,4,5,6,7,4,6],dtype=np.int64).reshape((-1,2))
     res_eb = np.array([0.,0.2,0.4,0.6,0.8,1.1,0.6,0.7,0.4,0.6],dtype=np.float).reshape((-1,2))
     with xdl.device("GPU"):
         add_sparse = xdl.sparse_grad_add_op([eb1,eb2,eb3],[ids1,ids2,ids3])
         eb, id = xdl.execute(add_sparse)
         self.assertTrue(np.equal(res_id, id).all())
         self.assertTrue(np.allclose(res_eb, eb))
Пример #29
0
 def test_cpu_tile_empty_value_reverse(self):
     empty_values = np.array([], dtype=np.float)
     res = xdl.tile_grad(embeds,
                         idx,
                         empty_values,
                         segs,
                         grps,
                         grads,
                         length=length,
                         reverse=True)
     res = xdl.execute(res)
     res_grad = np.array([[0.4], [1.0], [1.6], [1.6], [0.7], [1.4]],
                         dtype=np.float)
     self.assertTrue(np.allclose(res, res_grad))
Пример #30
0
 def test_gauc(self):
     filter_label = np.array([], dtype=np.float)
     gauc, pv_num = xdl.gauc_calc_op(labels,
                                     predicts,
                                     indicator,
                                     filter=filter_label)
     res = xdl.gauc_op(gauc, pv_num)
     gauc, pv_num, res = xdl.execute([gauc, pv_num, res])
     gauc_val = np.array([3.0], dtype=np.float32)
     pv_num_val = np.array([3], dtype=np.int32)
     gauc_res = np.array([1.], dtype=np.float32)
     self.assertTrue(np.allclose(gauc, gauc_val))
     self.assertTrue((pv_num == pv_num_val).all())
     self.assertTrue(np.allclose(res, gauc_res))
Пример #31
0
def train(is_training=True):
    #np.set_printoptions(threshold='nan')
    if is_training or xdl.get_task_index() == 0:
        init()
    else:
        return

    file_type = xdl.parsers.txt
    if is_training:
        data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1)
        data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0)

        data_io.batch_size(intconf('train_batch_size'))
        data_io.epochs(intconf('train_epochs'))
        data_io.threads(intconf('train_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('train_sample') + '_' + r'[\d]+'
        sharding = xdl.DataSharding(data_io.fs())
        sharding.add_path(data)
        paths = sharding.partition(rank=xdl.get_task_index(), size=xdl.get_task_num())
        print 'train: sharding.partition() =', paths
        data_io.add_path(paths)
        iop = xdl.GetIOP("TDMOP")
    else:
        data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1)
        data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0)

        data_io.batch_size(intconf('predict_batch_size'))
        data_io.epochs(intconf('predict_epochs'))
        data_io.threads(intconf('predict_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('test_sample')
        data_io.add_path(data)
        print 'predict: add_path =', data
        iop = xdl.GetIOP("TDMPREDICTOP")
        #data_io.finish_delay(True)
    assert iop is not None
    key_value = {}
    key_value["key"] = "value"
    key_value["debug"] = conf('tdmop_debug')
    key_value["layer_counts"] = conf('tdmop_layer_counts')
    key_value["pr_test_each_layer_retrieve_num"] = "400"
    key_value["pr_test_final_layer_retrieve_num"] = "200"
    iop.init(key_value)
    data_io.add_op(iop)
    data_io.split_group(False)
    if not is_training:
        data_io.keep_sample(True)
        data_io.pause(intconf('predict_io_pause_num'), True)
    data_io.startup()

    if not is_training:
        if xdl.get_task_index() == 0:
            saver = xdl.Saver()
            saver.restore(conf('saver_ckpt'))

    batch = data_io.read()

    emb_combiner = 'mean'    # mean | sum
    ind = batch["indicators"][0]
    ids = batch["_ids"][0]
    emb = []
    emb_dim = 24
    if is_training:
        feature_add_probability = 1.
    else:
        feature_add_probability = 0.
    import xdl.python.sparse_engine.embedding as embedding
    emb_name = "item_emb"
    for i in xrange(1, feature_count + 1):
        #emb_name = "item_%s_emb" % i
        eb = xdl.embedding(emb_name, batch["item_%s" % i], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability)
        with xdl.device('GPU'):
            eb_take = xdl.take_op(eb, batch["indicators"][0])
        eb_take.set_shape(eb.shape)
        emb.append(eb_take)
    #emb_name = "unit_id_expand_emb"
    unit_id_expand_emb = xdl.embedding(emb_name, batch["unit_id_expand"], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability)

    @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu')
    def dnn_model_define(user_input, indicator, unit_id_emb, label, bs, eb_dim, fea_groups, active_op='prelu', use_batch_norm=True):
        # 把用户输入按fea_groups划分窗口,窗口内做avg pooling
        fea_groups = [int(s) for s in fea_groups.split(',')]
        total_group_length = np.sum(np.array(fea_groups))
        print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim
        user_input_before_reshape = mx.sym.concat(*user_input)
        user_input = mx.sym.reshape(user_input_before_reshape, shape=(-1, total_group_length, eb_dim))
    
        layer_data = []
        # start att
        att_user_input = mx.sym.reshape(user_input, (bs, total_group_length, eb_dim))
        att_node_input = mx.sym.reshape(unit_id_emb, (bs, 1, eb_dim))
        att_node_input = mx.sym.broadcast_to(data=att_node_input, shape=(0, total_group_length, 0))
        att_din = mx.sym.concat(att_user_input, att_user_input * att_node_input, att_node_input, dim=2)

        att_active_op = 'prelu'
        att_layer_arr = []
        att_layer1 = FullyConnected3D(3*eb_dim, 36, active_op=att_active_op, version=1, batch_size=bs)
        att_layer_arr.append(att_layer1)
        att_layer2 = FullyConnected3D(36, 1, active_op=att_active_op, version=2, batch_size=bs)
        att_layer_arr.append(att_layer2)

        layer_data.append(att_din)
        for layer in att_layer_arr:
            layer_data.append(layer.call(layer_data[-1]))
        att_dout = layer_data[-1]
        att_dout = mx.sym.broadcast_to(data=att_dout, shape=(0, 0, eb_dim))

        user_input = mx.sym.reshape(user_input, shape=(bs, -1, eb_dim))
        user_input = user_input * att_dout
        # end att

        idx = 0
        for group_length in fea_groups:
            block_before_sum = mx.sym.slice_axis(user_input, axis=1, begin=idx, end=idx+group_length)
            block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length
            if idx == 0:
                grouped_user_input = block
            else:
                grouped_user_input = mx.sym.concat(grouped_user_input, block, dim=1)
            idx += group_length
    
        indicator = mx.symbol.BlockGrad(indicator)
        label = mx.symbol.BlockGrad(label)
        # 按indicator来扩展user fea,然后过网络
        #grouped_user_input_after_take = mx.symbol.take(grouped_user_input, indicator)
        grouped_user_input_after_take = grouped_user_input
        din = mx.symbol.concat(*[grouped_user_input_after_take, unit_id_emb], dim=1)
    
        net_version = "d"
        layer_arr = []
        layer1 = mx_dnn_layer(11 * eb_dim, 128, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (1, net_version))
        layer_arr.append(layer1)
        layer2 = mx_dnn_layer(128, 64, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (2, net_version))
        layer_arr.append(layer2)
        layer3 = mx_dnn_layer(64, 32, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (3, net_version))
        layer_arr.append(layer3)
        layer4 = mx_dnn_layer(32, 2, active_op='', use_batch_norm=False, version="%d_%s" % (4, net_version))
        layer_arr.append(layer4)
        #layer_data = [din]
        layer_data.append(din)
        for layer in layer_arr:
            layer_data.append(layer.call(layer_data[-1]))
        dout = layer_data[-1]
    
        # 正常label两列加和必为1,补全的label为0,故减一之后即可得到-1,作为ignore label
        ph_label_sum = mx.sym.sum(label, axis=1)
        ph_label_ignore = ph_label_sum - 1
        ph_label_ignore = mx.sym.reshape(ph_label_ignore, shape=(-1, 1))
        ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2)
        ph_label_click = ph_label_click + ph_label_ignore
        ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs, ))
    
        prop = mx.symbol.SoftmaxOutput(data=dout, label=ph_label_click, grad_scale=1.0, use_ignore=True, normalization='valid')
        origin_loss = mx.sym.log(prop) * label
        ph_label_sum = mx.sym.reshape(ph_label_sum, shape=(bs, 1))
        origin_loss = mx.sym.broadcast_mul(origin_loss, ph_label_sum)
        loss = - mx.symbol.sum(origin_loss) / mx.sym.sum(ph_label_sum)
        return prop, loss

    re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, '20,20,10,10,2,2,2,1,1,1')
    prop = re[0]
    loss = re[1]

    if is_training:
        train_op = xdl.Adam(learning_rate=intconf('learning_rate'), lr_decay=False).optimize()
        #train_op = xdl.SGD(0.1).optimize()
        #fc_1_weight_grad = xdl.get_gradient("fc_w_1_d")
        #fc_1_bias_grad = xdl.get_gradient("fc_b_1_d")
    else:
        fin = data_io.set_prop(prop=prop)

    hooks = []
    if is_training:
        if conf("train_mode") == "sync":
            hooks.append(xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num()))
        if xdl.get_task_index() == 0:
            ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval'))
            hooks.append(ckpt_hook)
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    else:
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    hooks.append(log_hook)

    from xdl.python.training.training_utils import get_global_step
    global_step = get_global_step()

    sess = xdl.TrainSession(hooks)

    elapsed_time = 0.
    statis_begin_loop = 200
    loop_num = 0
    while not sess.should_stop():
        print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num
        begin_time = time.time()
        for itr in xrange(200):
            if is_training:
                result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS)])
                #result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS), unit_id_expand_emb])
            else:
                result = sess.run([loss, fin, global_step.value])
                #result = sess.run([loss, fin, ids, global_step.value])
            if result is None:
                print "result is None, finished success."
                break
            if not is_training:
                print "global_step =", result[-1]
                #print "batch['_ids'] =", result[-2]
            #else:
            #   print "unit_id_expand_emb = { mean =", result[-1].mean(), ", std =", result[-1].std(), "}"
            loop_num += 1
        if loop_num > statis_begin_loop:
            elapsed_time += time.time() - begin_time
            #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time)

    if is_training:
        xdl.execute(xdl.ps_synchronize_leave_op(np.array(xdl.get_task_index(), dtype=np.int32)))
        if xdl.get_task_index() == 0:
            print 'start put item_emb'
            def _string_to_int8(src):
                return np.array([ord(ch) for ch in src], dtype=np.int8)
            from xdl.python.utils.config import get_ckpt_dir
            output_dir = conf('model_url')
            op = xdl.ps_convert_ckpt_variable_op(checkpoint_dir=_string_to_int8(get_ckpt_dir()), 
                                                 output_dir=_string_to_int8(output_dir), 
                                                 variables=_string_to_int8("item_emb"))
            xdl.execute(op)
            shell_cmd("rm -f data/item_emb")
            shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir)
            shell_cmd("sed -i 's/..//' data/item_emb")
            shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir)
            print 'finish put item_emb'
Пример #32
0
def train(is_training=True):
    if is_training or xdl.get_task_index() == 0:
        init()
    else:
        return

    file_type = xdl.parsers.txt
    if is_training:
        data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1)
        data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0)

        data_io.batch_size(intconf('train_batch_size'))
        data_io.epochs(intconf('train_epochs'))
        data_io.threads(intconf('train_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('train_sample') + '_' + r'[\d]+'
        sharding = xdl.DataSharding(data_io.fs())
        sharding.add_path(data)
        paths = sharding.partition(rank=xdl.get_task_index(), size=xdl.get_task_num())
        print 'train: sharding.partition() =', paths
        data_io.add_path(paths)
        iop = xdl.GetIOP("TDMOP")
    else:
        data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs,
                             namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False)

        feature_count = 69
        for i in xrange(1, feature_count + 1):
            data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1)
        data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0)
        data_io.feature(name="test_unit_id", type=xdl.features.sparse, table=1)

        data_io.batch_size(intconf('predict_batch_size'))
        data_io.epochs(intconf('predict_epochs'))
        data_io.threads(intconf('predict_threads'))
        data_io.label_count(2)
        base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir'))
        data = base_path + conf('test_sample')
        data_io.add_path(data)
        print 'predict: add_path =', data
        iop = xdl.GetIOP("TDMPREDICTOP")
        #data_io.finish_delay(True)
    assert iop is not None
    key_value = {}
    key_value["key"] = "value"
    key_value["debug"] = conf('tdmop_debug')
    key_value["layer_counts"] = conf('tdmop_layer_counts')
    key_value["start_sample_layer"] = "22"
    key_value["pr_test_each_layer_retrieve_num"] = "400"
    key_value["pr_test_final_layer_retrieve_num"] = "200"
    if not is_training:
        key_value["expand_mode"] = "vector"
    iop.init(key_value)
    data_io.add_op(iop)
    data_io.split_group(False)
    data_io.startup()

    if not is_training:
        if xdl.get_task_index() == 0:
            saver = xdl.Saver()
            saver.restore(conf('saver_ckpt'))

    batch = data_io.read()

    emb_combiner = 'mean'    # mean | sum
    if not is_training:
        gt_ids = batch["_ids"][-1]
        gt_segments = batch["_segments"][-1]
    emb = []
    emb_dim = 24
    if is_training:
        feature_add_probability = 1.
    else:
        feature_add_probability = 0.
    import xdl.python.sparse_engine.embedding as embedding
    emb_name = "item_emb"
    for i in xrange(1, feature_count + 1):
        eb = xdl.embedding(emb_name, batch["item_%s" % i], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability)
        with xdl.device('GPU'):
            eb_take = xdl.take_op(eb, batch["indicators"][0])
        eb_take.set_shape(eb.shape)
        emb.append(eb_take)
    unit_id_expand_emb = xdl.embedding(emb_name, batch["unit_id_expand"], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability)

    @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu')
    def dnn_model_define(user_input, indicator, unit_id_emb, label, bs, eb_dim, sample_num, fea_groups, active_op='prelu', use_batch_norm=True):
        # 把用户输入按fea_groups划分窗口,窗口内做avg pooling
        fea_groups = [int(s) for s in fea_groups.split(',')]
        total_group_length = np.sum(np.array(fea_groups))
        print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim
        user_input_before_reshape = mx.sym.concat(*user_input)
        user_input = mx.sym.reshape(user_input_before_reshape, shape=(-1, total_group_length, eb_dim))

        idx = 0
        for group_length in fea_groups:
            block_before_sum = mx.sym.slice_axis(user_input, axis=1, begin=idx, end=idx + group_length)
            block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length
            if idx == 0:
                grouped_user_input = block
            else:
                grouped_user_input = mx.sym.concat(grouped_user_input, block, dim=1)
            idx += group_length

        indicator = mx.symbol.BlockGrad(indicator)
        label = mx.symbol.BlockGrad(label)
        grouped_user_input_after_take = grouped_user_input

        net_version = "e"
        layer_arr = []
        layer1 = mx_dnn_layer(10 * eb_dim, 128, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (1, net_version))
        layer_arr.append(layer1)
        layer2 = mx_dnn_layer(128, 64, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (2, net_version))
        layer_arr.append(layer2)
        layer3 = mx_dnn_layer(64, 24, active_op='', use_batch_norm=False, version="%d_%s" % (3, net_version))
        layer_arr.append(layer3)

        layer_data = [grouped_user_input_after_take]
        for layer in layer_arr:
            layer_data.append(layer.call(layer_data[-1]))
        dout = layer_data[-1]

        inner_product = mx.sym.sum(dout * unit_id_emb, axis=1)

        softmax_input = mx.sym.Reshape(inner_product,
                                       shape=(
                                           bs / sample_num,
                                           sample_num
                                       )
                                       )

        # 用正例的label减1作为softmax的label
        ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2)
        ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs / sample_num, sample_num)) - 1
        ph_label_click = mx.sym.slice_axis(ph_label_click, axis=1, begin=0, end=1)
        ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs / sample_num, ))

        prop = mx.symbol.SoftmaxOutput(data=softmax_input, label=ph_label_click, normalization='valid', use_ignore=True)

        positive_prop = mx.sym.slice_axis(prop, axis=1, begin=0, end=1)
        positive_prop = mx.sym.reshape(positive_prop,
                                       shape=(bs / sample_num, )
                                       )

        # 实际的有效样本数量是(bs/sample_num)减去需要ignore的label数量
        loss = -mx.sym.sum(mx.symbol.log(positive_prop)) / (bs / sample_num + mx.sym.sum(ph_label_click))

        user_vector = mx.sym.reshape(dout, shape=(bs / sample_num, sample_num, eb_dim))
        user_vector = mx.sym.slice_axis(user_vector, axis=1, begin=0, end=1)
        user_vector = mx.sym.reshape(user_vector, shape=(bs / sample_num, eb_dim))

        return prop, loss, mx.sym.BlockGrad(user_vector)

    if is_training:
        re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, 600, '20,20,10,10,2,2,2,1,1,1')
    else:
        re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, 1, '20,20,10,10,2,2,2,1,1,1')
    prop = re[0]
    loss = re[1]

    if is_training:
        train_op = xdl.Adam(learning_rate=intconf('learning_rate')).optimize()
    else:
        user_vector = re[2]
 
    hooks = []
    if is_training:
        if conf("train_mode") == "sync":
            hooks.append(xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num()))
        if xdl.get_task_index() == 0:
            ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval'))
            hooks.append(ckpt_hook)
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    else:
        log_hook = xdl.LoggerHook([loss], "#### loss:{0}")
    hooks.append(log_hook)

    from xdl.python.training.training_utils import get_global_step
    global_step = get_global_step()

    sess = xdl.TrainSession(hooks)

    elapsed_time = 0.
    statis_begin_loop = 200
    loop_num = 0

    if not is_training:
        urun_re = iop.urun({"get_level_ids": key_value["start_sample_layer"]})
        item_num = len(urun_re)
        item_ids = np.array([int(iid) for iid in urun_re.keys()], dtype=np.int64).reshape((item_num, 1))
        print 'item_ids shape: '
        print item_ids.shape
        zeros = np.zeros((item_num, 1), dtype=np.int64)
        hash_ids = np.concatenate((zeros, item_ids), axis=1)
        item_embeddings = xdl.execute(xdl.ps_sparse_pull_op(hash_ids, var_name="item_emb", var_type="hash", save_ratio=1.0, otype=xdl.DataType.float))
        item_embeddings = item_embeddings.transpose()
        print 'item_embeddings shape: '
        print item_embeddings.shape

        hit_num_list = []
        precision_list = []
        recall_list = []
        gt_num_list = []
        user_idx = 1

    while not sess.should_stop():
        print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num
        begin_time = time.time()
        for itr in xrange(200):
            if is_training:
                result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS)])
            else:
                result = sess.run([user_vector, global_step.value, gt_ids, gt_segments])
            if result is None:
                print "result is None, finished success."
                break
            if not is_training:
                print "global_step =", result[1]
                batch_uv = result[0]
                batch_gt = result[2]
                batch_seg = result[3]

                batch_uv = batch_uv[0:len(batch_seg)]
                batch_scores = np.matmul(batch_uv, item_embeddings)

                sorted_idx = np.argsort(-batch_scores, axis=1)

                sorted_idx = sorted_idx[:, :int(key_value["pr_test_final_layer_retrieve_num"])]
                gt_id_start_idx = 0
                for i in xrange(len(batch_seg)):
                    pred_set = set(item_ids[sorted_idx[i, :], 0])
                    gt_dict = {}
                    for gt in batch_gt[gt_id_start_idx:batch_seg[i], 1]:
                        if gt in gt_dict:
                            gt_dict[gt] += 1
                        else:
                            gt_dict[gt] = 1

                    test_gt_list = batch_gt[gt_id_start_idx:batch_seg[i], 1].tolist()
                    test_gt_str = ','.join([str(gtid) for gtid in test_gt_list])
                    test_pred_list = item_ids[sorted_idx[i, :], 0].tolist()
                    test_pred_str = ','.join([str(gtid) for gtid in test_pred_list])

                    user_idx += 1

                    gt_set = set(batch_gt[gt_id_start_idx:batch_seg[i], 1])
                    comm_set = gt_set.intersection(pred_set)

                    hit_num = sum([float(gt_dict[item]) if item in gt_dict else 0.0 for item in comm_set])
                    hit_num_list.append(hit_num)

                    if len(pred_set) > 0:
                        precision = hit_num / len(pred_set)
                    else:
                        precision = 0.0

                    if len(gt_dict) > 0:
                        recall = hit_num / (batch_seg[i] - gt_id_start_idx)
                    else:
                        recall = 0.0

                    precision_list.append(precision)
                    recall_list.append(recall)
                    gt_num_list.append(float(batch_seg[i] - gt_id_start_idx))

                    gt_id_start_idx = batch_seg[i]

                print "=================================================="
                print 'predicted user num is: %d' % len(hit_num_list)
                print 'gt num is: %f' % sum(gt_num_list)
                print 'precision: %f' % (sum(precision_list) / len(hit_num_list))
                print 'recall: %f' % (sum(recall_list) / len(hit_num_list))
                print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list))
                print "=================================================="

            loop_num += 1
        if loop_num > statis_begin_loop:
            elapsed_time += time.time() - begin_time
            #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time)

    if not is_training:
        print "=================================================="
        print 'predicted user num is: %d' % len(hit_num_list)
        print 'gt num is: %f' % sum(gt_num_list)
        print 'precision: %f' % (sum(precision_list) / len(hit_num_list))
        print 'recall: %f' % (sum(recall_list) / len(hit_num_list))
        print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list))
        print "=================================================="

    if is_training:
        xdl.execute(xdl.ps_synchronize_leave_op(np.array(xdl.get_task_index(), dtype=np.int32)))
        if xdl.get_task_index() == 0:
            print 'start put item_emb'

            def _string_to_int8(src):
                return np.array([ord(ch) for ch in src], dtype=np.int8)
            from xdl.python.utils.config import get_ckpt_dir
            output_dir = conf('model_url')
            op = xdl.ps_convert_ckpt_variable_op(checkpoint_dir=_string_to_int8(get_ckpt_dir()),
                                                 output_dir=_string_to_int8(output_dir),
                                                 variables=_string_to_int8("item_emb"))
            xdl.execute(op)
            shell_cmd("rm -f data/item_emb")
            shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir)
            shell_cmd("sed -i 's/..//' data/item_emb")
            shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir)
            print 'finish put item_emb'