def mock_embedding(name1, name2): ids = xdl.convert_to_tensor( np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64)) values = xdl.convert_to_tensor(np.array([1.0, 2.0, 3.0], dtype=np.float32)) segments = xdl.convert_to_tensor(np.array([3], dtype=np.int32)) sparse = xdl.SparseTensor(ids, values, segments) emb = xdl.embedding(name1, sparse, xdl.Ones(), embed_dim, 16, 'sum', vtype='hash') emb.set_shape((1, 3)) ids2 = xdl.convert_to_tensor( np.array([[0, 1], [0, 2], [1, 1]], dtype=np.int64)) values2 = xdl.convert_to_tensor(np.array([1.0, 2.0, 3.0], dtype=np.float32)) segments2 = xdl.convert_to_tensor(np.array([3], dtype=np.int32)) sparse2 = xdl.SparseTensor(ids2, values2, segments2) emb2 = xdl.embedding(name2, sparse2, xdl.Ones(), embed_dim, 16, 'sum', vtype='hash') emb2.set_shape((1, 3)) return [emb, emb2]
def _dequeue_sparse(self, tensors, offset): offset_idx = offset.offset if offset.has_unique_id: return xdl.SparseTensor(tensors[offset_idx], tensors[offset_idx + 1], tensors[offset_idx + 2], tensors[offset_idx + 3], tensors[offset_idx + 4], tensors[offset_idx + 5]) else: return xdl.SparseTensor(tensors[offset_idx], tensors[offset_idx + 1], tensors[offset_idx + 2])
def eval_input_fn(): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01) labels = xdl.mock_dense_op(shape=[1, 1], value=1.0) ids, values, segments = xdl.mock_sparse_op(dense_shape=[1, 16]) sparse = xdl.SparseTensor(ids, values, segments) emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum') dense.set_shape([None, 16]) labels.set_shape([None, 1]) return [dense, emb], labels
def input_fn(): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01) indicator = xdl.mock_dense_op(shape=[5], value=0.0) labels = xdl.mock_dense_op(shape=[5, 1], value=1.0) ids, values, segments = xdl.mock_sparse_op(dense_shape=[1, 16]) sparse = xdl.SparseTensor(ids, values, segments) sparse.set_shape([1, 16]) emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum') dense.set_shape([1, 16]) indicator.set_shape([5]) labels.set_shape([5, 1]) return [dense, emb, indicator], labels
def _py_func(self, fn, sparse_cnt=7): types = [] for _ in range(sparse_cnt): types.extend([np.int64, np.float32, np.int32]) types.extend([np.float32, np.float32, np.int32]) types.extend([np.int32 for _ in range(8)]) datas = xdl.py_func(fn, [], output_type=types) sparse_tensors = [] for i in range(sparse_cnt): sparse_tensors.append( xdl.SparseTensor(datas[3 * i], datas[3 * i + 1], datas[3 * i + 2])) return sparse_tensors + datas[sparse_cnt * 3:]
def main(): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense") gear = xdl.mock_dense_op(shape=[1, 1], value=0.01, name_="gear") labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label") ids, values, segments = xdl.mock_sparse_op(dense_shape=[1, 16], name_="wide") sparse = xdl.SparseTensor(ids, values, segments) emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum') gear.set_shape([None, 1]) dense.set_shape([None, 16]) labels.set_shape([None, 1]) with xdl.model_scope("ams_main"): loss = ams_main(main_model)(dense, emb, labels, gear_inputs=[gear]) sess = xdl.TrainSession() return sess.run(xdl.get_collection("gear_grad"))
def test_all(self): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense") labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label") ids = xdl.convert_to_tensor( np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64)) values = xdl.convert_to_tensor( np.array([1.0, 2.0, 3.0], dtype=np.float32)) segments = xdl.convert_to_tensor(np.array([3], dtype=np.int32)) sparse = xdl.SparseTensor(ids, values, segments) emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum', vtype='hash') loss = model(dense, emb, labels) train_op = xdl.SGD(0.5).optimize() sess = xdl.TrainSession() _, l, g = sess.run( [train_op, loss, xdl.get_sparse_grads('sparse').grad]) self.assertTrue((l == np.array(0.0024364376, dtype=np.float32)).all()) self.assertTrue( (g == np.array([[-0.002433472], [-0.004866944], [-0.007300416]], dtype=np.float32)).all()) sparse_var = xdl.get_variable_by_name('sparse') weights = sess.run( sparse_var.gather( np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64))) self.assertTrue( (weights == np.array([[1.0012168], [1.0024334], [1.0036502]], dtype=np.float32)).all()) _, l, g = sess.run( [train_op, loss, xdl.get_sparse_grads('sparse').grad]) self.assertTrue((l == np.array(0.002395329, dtype=np.float32)).all()) self.assertTrue( (g == np.array([[-0.0023924622], [-0.0047849244], [-0.0071773864]], dtype=np.float32)).all()) weights = sess.run( sparse_var.gather( np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64))) self.assertTrue( (weights == np.array([[1.002413], [1.0048258], [1.0072389]], dtype=np.float32)).all())
def main(): dense = xdl.mock_dense_op(shape=[1, 16], value=0.01, name_="dense") labels = xdl.mock_dense_op(shape=[1, 1], value=1.0, name_="label") ids = xdl.convert_to_tensor( np.array([[0, 0], [0, 1], [0, 2]], dtype=np.int64)) values = xdl.convert_to_tensor(np.array([1.0, 2.0, 3.0], dtype=np.float32)) segments = xdl.convert_to_tensor(np.array([3], dtype=np.int32)) sparse = xdl.SparseTensor(ids, values, segments) emb = xdl.embedding("sparse", sparse, xdl.Ones(), 1, 16, 'sum', vtype='hash') loss = model(dense, emb, labels) train_op = xdl.SGD(0.5).optimize() sess = xdl.TrainSession() loss, gradients = sess.run([loss, xdl.get_sparse_grads('sparse').grad]) return loss, gradients
def next_batch(self): """ data[0] -> click_seq data[1] -> itemid data[2] -> label :return: """ types = [] for _ in range(2): #for click_seqs and itemid types.extend([np.int32, np.float32, np.int32]) #types.append(np.int32) #for itemid types.append(np.float32) # for label types.append(np.int32) # for click seq types.append(np.int32) # for itemid datas =xdl.py_func(self.read_and_parse_data, [], output_type=types) sparse_cnt = 2 #only click_seq and itemid is sparse sparse_tensors = [] for i in range(sparse_cnt): sparse_tensors.append(xdl.SparseTensor( datas[3 * i], datas[3 * i + 1], datas[3 * i + 2])) # a batch of sparse examples . ids/values/segments_index return sparse_tensors + datas[sparse_cnt * 3:] #5 emelemts left
def read(self): assert self._batch_size > 0 assert self._label_count > 0 out = xdl.get_batch(ds=self._ds_name, sparse_count=len(self._sparse_list), dense_count=len(self._dense_list), indicator_count=self._nindicator, dtype=xdl.DataType.float) batch = dict() batch["indicators"] = out[0] batch["_indices"] = out[1] batch["_ids"] = out[2] batch["_segments"] = out[3] batch["_svalues"] = out[4] batch["_dvalues"] = out[5] if self._keep_skey: batch["skbuf"] = out[6] batch["sklen"] = out[7] batch["label"] = out[8] ### indicator for i in range(len(batch["indicators"])): batch["indicators"][i].set_shape([self._batch_size]) ### sparse assert len(self._sparse_list) == len(batch['_indices']) assert len(self._sparse_list) == len(batch['_ids']) assert len(self._sparse_list) == len(batch['_svalues']) assert len(self._sparse_list) == len(batch['_segments']) for i in range(len(self._sparse_list)): name = self._sparse_list[i] batch[name] = xdl.SparseTensor( batch['_ids'][i], batch["_svalues"][i], batch['_segments'][i], batch['_indices'][i] if self._unique_ids else None) opt = self._fea_dict.get(name) assert opt != None assert opt['type'] == pybind.features.sparse batch[name].set_shape([self._batch_size]) batch[name].set_name(name) ### dense assert len(self._dense_list) == len(batch['_dvalues']) for i in range(len(self._dense_list)): name = self._dense_list[i] batch[name] = batch["_dvalues"][i] opt = self._fea_dict.get(name) assert opt != None assert opt['type'] == pybind.features.dense assert opt['nvec'] > 0 batch[name].set_shape((self._batch_size, opt['nvec'])) ### label batch['label'].set_shape([self._batch_size, self._label_count]) ### tags self.append_tags() return batch