def update_global_step_op(self): global_step = get_global_step() update_op = xdl.ps_assign_add_op( var_name = global_step.name, var_type = global_step.vtype, delta = np.array(1, dtype=np.int64)) return update_op
def __init__(self, save_interval_step, is_training=True): super(HashIncreaseStreamHook, self).__init__() self._global_step = get_global_step() self._save_interval = save_interval_step self._saver = HashIncreaser() self._is_training = is_training self._last_save_step = 0
def __init__(self, **kwargs): self._meta = {} for k, v in kwargs.iteritems(): if not isinstance(v, Tensor): continue self._meta[k] = v self._meta['xdl_global_step'] = get_global_step().value
def __init__(self, save_interval_step, is_training=True): super(CheckpointHook, self).__init__() self._global_step = get_global_step() self._save_interval = save_interval_step self._ckpt_dir = get_ckpt_dir() self._saver = Saver(self._ckpt_dir) self._is_training = is_training self._save_cnt = 0 self._first_run = True
def __init__(self, vars, interval_steps, expire_steps): super(GlobalStepFilterHook, self).__init__(priority=2999) self._interval_steps = interval_steps self._expire_steps = expire_steps self._vars = vars self._global_step = get_global_step() self._last_filter_step = 0 self.gstep_val = 0
def __init__(self, var_name, ids): super(GlobalStepMarkHook, self).__init__() from xdl.python.sparse_engine.embedding import * embedding_info = get_embedding_info_by_name(var_name) global_step = get_global_step().value with xdl.control_dependencies([embedding_info.embedding]): self._update = xdl.ps_mark_op(ids=ids, i=global_step, var_name=var_name, pattern="global_step")
def __init__(self, save_interval_step, is_training=True, export_graph=True, as_text=True): super(CheckpointHook, self).__init__() self._global_step = get_global_step() self._save_interval = save_interval_step self._ckpt_dir = get_ckpt_dir() self._saver = Saver() self._is_training = is_training self._save_cnt = 0 self._first_run = True self._export_graph = export_graph self._as_text = as_text
def __init__(self, vars, interval_steps, expire_steps): super(GlobalStepFilterHook, self).__init__(priority=2999) if is_local_mode(): raise Exception( "GlobalStepFilterHook only support distributed mode") self._interval_steps = interval_steps self._expire_steps = expire_steps self._vars = vars self._global_step = get_global_step() self._last_filter_step = 0 self.gstep_val = 0
def save_meta(self, version, **kwargs): kwargs['xdl_global_step'] = get_global_step().value values = [] for k, v in kwargs.iteritems(): if isinstance(v, Tensor): v = execute(v) values.append(v) else: values.append(np.array(v)) values = [min(v.flatten().tolist()) for v in values] keys = kwargs.keys() assert len(keys) == len(values) buf = json.dumps(dict(zip(keys, values))) path = os.path.join(self._ckpt_dir, version, '.meta') write_string_to_file(path, buf)
def __init__(self, save_interval_step=None, save_interval_secs=None, is_training=True, meta=None, tf_backend=False, max_to_keep=5, tf_graph_name=None): super(CheckpointHook, self).__init__(priority=3000) self._global_step = get_global_step() self._save_interval_step = save_interval_step self._save_interval_secs = save_interval_secs self._ckpt_dir = get_ckpt_dir() self._saver = Saver(self._ckpt_dir, tf_graph_name) self._is_training = is_training self._last_save_step = 0 self._last_save_time = time.time() self._meta = meta self._max_to_keep = max_to_keep self._ckpt_queue = [] if self._save_interval_step is None and self._save_interval_secs is None: print("Checkpoint interval_steps and interval_secs both not set, use default 10000 steps.") self._save_interval_step = 10000 elif self._save_interval_step is not None and self._save_interval_secs is not None: raise ValueError("Checkpoint interval_steps and interval_secs can't be both set.") self.gstep_val = 0 self.meta_val = None
def embedding(name, sparse_input, initializer, emb_dim, feature_dim, combiner='sum', vtype=VarType.Index, length=50, reverse=False, batch_read=3000, feature_add_probability=1.0, cbf=0, device='CPU', statis_list=None, statis_decay=0.07, statis_decay_period=100, labels=None, save=True, **device_attr): """xdl embedding Args: name: name for embedding, will be used for declaring variable on ps-plus sparse_input: a sparse tensor represent input data initializer: intializer for the variable on ps-plus emb_dim: embedding dimension feature_dim: sparse input dimension, for pre-allocate memory combiner: reduce operator, support sum|mean Returns: a tensor represent embedding result Raises: None """ global EMBEDDING_LIST, EMBEDDING_SET if name not in EMBEDDING_SET: EMBEDDING_SET.add(name) EMBEDDING_LIST.append(name) import xdl.python.framework.variable as variable with variable.variable_info(batch_read=batch_read, save_ratio=feature_add_probability, bloom_filter=cbf, save="true" if save else "false"): var = variable.Variable(name=name, dtype=DataType.float, shape=[feature_dim, emb_dim], initializer=initializer, vtype=vtype, trainable=True) if statis_list is not None: statis_vars = [] for statis_type in statis_list: statis_var = variable.Variable(name=name, dtype=DataType.float, shape=[feature_dim, 1], initializer=xdl.Zeros(), vtype=vtype, trainable=False, statis_type=statis_type, statis_decay=statis_decay, statis_decay_period=statis_decay_period) statis_vars.append(statis_var) if sparse_input.has_unique_ids(): unique_ids = xdl.identity_op(sparse_input.ids) idx = sparse_input.indices embeddings = var.gather(unique_ids) sidx = sparse_input.sidx sseg = sparse_input.sseg else: with xdl.device(device, **device_attr): unique_ids, idx, sidx, sseg = xdl.unique(sparse_input.ids, sparse_input.segments, itype=DataType.int32) embeddings = var.gather(unique_ids) if statis_list is not None: assert labels is not None from xdl.python.training.training_utils import get_global_step global_step = get_global_step() statis_results = [] for statis_var in statis_vars: statis_result = statis_var.statis(sparse_input.ids, idx, sparse_input.segments, sidx, sseg, labels, global_step.value) statis_results.append(statis_result) global _EMBEDDING_TENSOR _EMBEDDING_TENSOR[embeddings] = var import xdl.python.sparse_engine.embedding_ops as embedding_ops import numpy as np if combiner == 'sum': embeddings = embedding_ops.ksum( embeddings, idx, sparse_input.values, sparse_input.segments, sidx, sseg, device, **device_attr) elif combiner == 'mean': embeddings = embedding_ops.kmean( embeddings, idx, sparse_input.values, sparse_input.segments, sidx, sseg, device, **device_attr) elif combiner == 'tile': embeddings = embedding_ops.tile( embeddings, idx, np.array([], dtype=np.float32), #sparse_input.values, sparse_input.segments, length, reverse, device, **device_attr) else: raise Exception("Unrecognized combiner:" + str(combiner)) if sparse_input.shape is not None and len(sparse_input.shape) > 0: embeddings.set_shape([sparse_input.shape[0], emb_dim]); emb_info = EmbeddingInfo(name, feature_dim, emb_dim, combiner, None, var, length, embeddings) set_embedding_info([var], emb_info) if statis_list is not None: return embeddings, statis_results return embeddings
def train(is_training=True): #np.set_printoptions(threshold='nan') if is_training or xdl.get_task_index() == 0: init() else: return file_type = xdl.parsers.txt if is_training: data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs, namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False) feature_count = 69 for i in xrange(1, feature_count + 1): data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1) data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0) data_io.batch_size(intconf('train_batch_size')) data_io.epochs(intconf('train_epochs')) data_io.threads(intconf('train_threads')) data_io.label_count(2) base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir')) data = base_path + conf('train_sample') + '_' + r'[\d]+' sharding = xdl.DataSharding(data_io.fs()) sharding.add_path(data) paths = sharding.partition(rank=xdl.get_task_index(), size=xdl.get_task_num()) print 'train: sharding.partition() =', paths data_io.add_path(paths) iop = xdl.GetIOP("TDMOP") else: data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs, namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False) feature_count = 69 for i in xrange(1, feature_count + 1): data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1) data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0) data_io.batch_size(intconf('predict_batch_size')) data_io.epochs(intconf('predict_epochs')) data_io.threads(intconf('predict_threads')) data_io.label_count(2) base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir')) data = base_path + conf('test_sample') data_io.add_path(data) print 'predict: add_path =', data iop = xdl.GetIOP("TDMPREDICTOP") #data_io.finish_delay(True) assert iop is not None key_value = {} key_value["key"] = "value" key_value["debug"] = conf('tdmop_debug') key_value["layer_counts"] = conf('tdmop_layer_counts') key_value["pr_test_each_layer_retrieve_num"] = "400" key_value["pr_test_final_layer_retrieve_num"] = "200" iop.init(key_value) data_io.add_op(iop) data_io.split_group(False) if not is_training: data_io.keep_sample(True) data_io.pause(intconf('predict_io_pause_num'), True) data_io.startup() if not is_training: if xdl.get_task_index() == 0: saver = xdl.Saver() saver.restore(conf('saver_ckpt')) batch = data_io.read() emb_combiner = 'mean' # mean | sum ind = batch["indicators"][0] ids = batch["_ids"][0] emb = [] emb_dim = 24 if is_training: feature_add_probability = 1. else: feature_add_probability = 0. import xdl.python.sparse_engine.embedding as embedding emb_name = "item_emb" for i in xrange(1, feature_count + 1): #emb_name = "item_%s_emb" % i eb = xdl.embedding(emb_name, batch["item_%s" % i], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability) with xdl.device('GPU'): eb_take = xdl.take_op(eb, batch["indicators"][0]) eb_take.set_shape(eb.shape) emb.append(eb_take) #emb_name = "unit_id_expand_emb" unit_id_expand_emb = xdl.embedding(emb_name, batch["unit_id_expand"], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability) @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu') def dnn_model_define(user_input, indicator, unit_id_emb, label, bs, eb_dim, fea_groups, active_op='prelu', use_batch_norm=True): # 把用户输入按fea_groups划分窗口,窗口内做avg pooling fea_groups = [int(s) for s in fea_groups.split(',')] total_group_length = np.sum(np.array(fea_groups)) print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim user_input_before_reshape = mx.sym.concat(*user_input) user_input = mx.sym.reshape(user_input_before_reshape, shape=(-1, total_group_length, eb_dim)) layer_data = [] # start att att_user_input = mx.sym.reshape(user_input, (bs, total_group_length, eb_dim)) att_node_input = mx.sym.reshape(unit_id_emb, (bs, 1, eb_dim)) att_node_input = mx.sym.broadcast_to(data=att_node_input, shape=(0, total_group_length, 0)) att_din = mx.sym.concat(att_user_input, att_user_input * att_node_input, att_node_input, dim=2) att_active_op = 'prelu' att_layer_arr = [] att_layer1 = FullyConnected3D(3*eb_dim, 36, active_op=att_active_op, version=1, batch_size=bs) att_layer_arr.append(att_layer1) att_layer2 = FullyConnected3D(36, 1, active_op=att_active_op, version=2, batch_size=bs) att_layer_arr.append(att_layer2) layer_data.append(att_din) for layer in att_layer_arr: layer_data.append(layer.call(layer_data[-1])) att_dout = layer_data[-1] att_dout = mx.sym.broadcast_to(data=att_dout, shape=(0, 0, eb_dim)) user_input = mx.sym.reshape(user_input, shape=(bs, -1, eb_dim)) user_input = user_input * att_dout # end att idx = 0 for group_length in fea_groups: block_before_sum = mx.sym.slice_axis(user_input, axis=1, begin=idx, end=idx+group_length) block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length if idx == 0: grouped_user_input = block else: grouped_user_input = mx.sym.concat(grouped_user_input, block, dim=1) idx += group_length indicator = mx.symbol.BlockGrad(indicator) label = mx.symbol.BlockGrad(label) # 按indicator来扩展user fea,然后过网络 #grouped_user_input_after_take = mx.symbol.take(grouped_user_input, indicator) grouped_user_input_after_take = grouped_user_input din = mx.symbol.concat(*[grouped_user_input_after_take, unit_id_emb], dim=1) net_version = "d" layer_arr = [] layer1 = mx_dnn_layer(11 * eb_dim, 128, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (1, net_version)) layer_arr.append(layer1) layer2 = mx_dnn_layer(128, 64, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (2, net_version)) layer_arr.append(layer2) layer3 = mx_dnn_layer(64, 32, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (3, net_version)) layer_arr.append(layer3) layer4 = mx_dnn_layer(32, 2, active_op='', use_batch_norm=False, version="%d_%s" % (4, net_version)) layer_arr.append(layer4) #layer_data = [din] layer_data.append(din) for layer in layer_arr: layer_data.append(layer.call(layer_data[-1])) dout = layer_data[-1] # 正常label两列加和必为1,补全的label为0,故减一之后即可得到-1,作为ignore label ph_label_sum = mx.sym.sum(label, axis=1) ph_label_ignore = ph_label_sum - 1 ph_label_ignore = mx.sym.reshape(ph_label_ignore, shape=(-1, 1)) ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2) ph_label_click = ph_label_click + ph_label_ignore ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs, )) prop = mx.symbol.SoftmaxOutput(data=dout, label=ph_label_click, grad_scale=1.0, use_ignore=True, normalization='valid') origin_loss = mx.sym.log(prop) * label ph_label_sum = mx.sym.reshape(ph_label_sum, shape=(bs, 1)) origin_loss = mx.sym.broadcast_mul(origin_loss, ph_label_sum) loss = - mx.symbol.sum(origin_loss) / mx.sym.sum(ph_label_sum) return prop, loss re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, '20,20,10,10,2,2,2,1,1,1') prop = re[0] loss = re[1] if is_training: train_op = xdl.Adam(learning_rate=intconf('learning_rate'), lr_decay=False).optimize() #train_op = xdl.SGD(0.1).optimize() #fc_1_weight_grad = xdl.get_gradient("fc_w_1_d") #fc_1_bias_grad = xdl.get_gradient("fc_b_1_d") else: fin = data_io.set_prop(prop=prop) hooks = [] if is_training: if conf("train_mode") == "sync": hooks.append(xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num())) if xdl.get_task_index() == 0: ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval')) hooks.append(ckpt_hook) log_hook = xdl.LoggerHook([loss], "#### loss:{0}") else: log_hook = xdl.LoggerHook([loss], "#### loss:{0}") hooks.append(log_hook) from xdl.python.training.training_utils import get_global_step global_step = get_global_step() sess = xdl.TrainSession(hooks) elapsed_time = 0. statis_begin_loop = 200 loop_num = 0 while not sess.should_stop(): print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num begin_time = time.time() for itr in xrange(200): if is_training: result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS)]) #result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS), unit_id_expand_emb]) else: result = sess.run([loss, fin, global_step.value]) #result = sess.run([loss, fin, ids, global_step.value]) if result is None: print "result is None, finished success." break if not is_training: print "global_step =", result[-1] #print "batch['_ids'] =", result[-2] #else: # print "unit_id_expand_emb = { mean =", result[-1].mean(), ", std =", result[-1].std(), "}" loop_num += 1 if loop_num > statis_begin_loop: elapsed_time += time.time() - begin_time #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time) if is_training: xdl.execute(xdl.ps_synchronize_leave_op(np.array(xdl.get_task_index(), dtype=np.int32))) if xdl.get_task_index() == 0: print 'start put item_emb' def _string_to_int8(src): return np.array([ord(ch) for ch in src], dtype=np.int8) from xdl.python.utils.config import get_ckpt_dir output_dir = conf('model_url') op = xdl.ps_convert_ckpt_variable_op(checkpoint_dir=_string_to_int8(get_ckpt_dir()), output_dir=_string_to_int8(output_dir), variables=_string_to_int8("item_emb")) xdl.execute(op) shell_cmd("rm -f data/item_emb") shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir) shell_cmd("sed -i 's/..//' data/item_emb") shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir) print 'finish put item_emb'
def __init__(self, config=None, is_training=True, gstep=None, filter=_default_filter, scope=None): super(TraceHook, self).__init__() self._keys = [] self._values = [] self._summaries = [] # callbacks self._cb_keys = [] self._cbs = [] # once self._once_keys = [] self._once_values = [] # sparse assign self._sparse_assign_vars = [] self._sparse_assign_ids = [] self._sparse_assign_values = [] if is_training or gstep is None: self._gstep = get_global_step() self._use_gstep = True else: self._gstep = gstep self._use_gstep = False self._lstep = 0 if config is None or 'output_dir' not in config: print('WARNING: no output_dir found, trace will not work!!!') self._writer = None else: self._writer = TraceWriter(config, is_training) self._filter = filter scope = list(scope) if isinstance(scope, (list, tuple)) else [scope] for s in scope: info = get_values('tf_sparse_assign', s) vars = get_variables('tf_sparse_assign', s) self._sparse_assign_ids.extend( [info[i] for i in range(len(info)) if i % 2 == 0]) self._sparse_assign_values.extend( [info[i] for i in range(len(info)) if i % 2 == 1]) self._sparse_assign_vars.extend( [vars[i] for i in range(len(vars)) if i % 2 == 0]) for vtype in ['xdl', 'tf', 'mxnet']: self._keys.extend(get_names(vtype, s)) self._values.extend(get_values(vtype, s)) self._summaries.extend(get_functions(vtype, s)) self._cb_keys.extend(get_names('function', s)) self._cbs.extend(get_functions('function', s)) self._once_keys.extend(get_names('once', s)) self._once_values.extend(get_values('once', s)) assert len(self._sparse_assign_ids) == len( self._sparse_assign_values) assert len(self._sparse_assign_ids) == len( self._sparse_assign_vars) assert len(self._keys) == len(self._values) assert len(self._keys) == len(self._summaries) assert len(self._cb_keys) == len(self._cbs) assert len(self._once_keys) == len(self._once_values) print 'before_run_tensors:', self._values
def train(is_training=True): if is_training or xdl.get_task_index() == 0: init() else: return file_type = xdl.parsers.txt if is_training: data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs, namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False) feature_count = 69 for i in xrange(1, feature_count + 1): data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1) data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0) data_io.batch_size(intconf('train_batch_size')) data_io.epochs(intconf('train_epochs')) data_io.threads(intconf('train_threads')) data_io.label_count(2) base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir')) data = base_path + conf('train_sample') + '_' + r'[\d]+' sharding = xdl.DataSharding(data_io.fs()) sharding.add_path(data) paths = sharding.partition(rank=xdl.get_task_index(), size=xdl.get_task_num()) print 'train: sharding.partition() =', paths data_io.add_path(paths) iop = xdl.GetIOP("TDMOP") else: data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs, namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False) feature_count = 69 for i in xrange(1, feature_count + 1): data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1) data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0) data_io.feature(name="test_unit_id", type=xdl.features.sparse, table=1) data_io.batch_size(intconf('predict_batch_size')) data_io.epochs(intconf('predict_epochs')) data_io.threads(intconf('predict_threads')) data_io.label_count(2) base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir')) data = base_path + conf('test_sample') data_io.add_path(data) print 'predict: add_path =', data iop = xdl.GetIOP("TDMPREDICTOP") #data_io.finish_delay(True) assert iop is not None key_value = {} key_value["key"] = "value" key_value["debug"] = conf('tdmop_debug') key_value["layer_counts"] = conf('tdmop_layer_counts') key_value["start_sample_layer"] = "22" key_value["pr_test_each_layer_retrieve_num"] = "400" key_value["pr_test_final_layer_retrieve_num"] = "200" if not is_training: key_value["expand_mode"] = "vector" iop.init(key_value) data_io.add_op(iop) data_io.split_group(False) data_io.startup() if not is_training: if xdl.get_task_index() == 0: saver = xdl.Saver() saver.restore(conf('saver_ckpt')) batch = data_io.read() emb_combiner = 'mean' # mean | sum if not is_training: gt_ids = batch["_ids"][-1] gt_segments = batch["_segments"][-1] emb = [] emb_dim = 24 if is_training: feature_add_probability = 1. else: feature_add_probability = 0. import xdl.python.sparse_engine.embedding as embedding emb_name = "item_emb" for i in xrange(1, feature_count + 1): eb = xdl.embedding(emb_name, batch["item_%s" % i], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability) with xdl.device('GPU'): eb_take = xdl.take_op(eb, batch["indicators"][0]) eb_take.set_shape(eb.shape) emb.append(eb_take) unit_id_expand_emb = xdl.embedding( emb_name, batch["unit_id_expand"], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability) @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu') def dnn_model_define(user_input, indicator, unit_id_emb, label, bs, eb_dim, sample_num, fea_groups, active_op='prelu', use_batch_norm=True): # 把用户输入按fea_groups划分窗口,窗口内做avg pooling fea_groups = [int(s) for s in fea_groups.split(',')] total_group_length = np.sum(np.array(fea_groups)) print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim user_input_before_reshape = mx.sym.concat(*user_input) user_input = mx.sym.reshape(user_input_before_reshape, shape=(-1, total_group_length, eb_dim)) idx = 0 for group_length in fea_groups: block_before_sum = mx.sym.slice_axis(user_input, axis=1, begin=idx, end=idx + group_length) block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length if idx == 0: grouped_user_input = block else: grouped_user_input = mx.sym.concat(grouped_user_input, block, dim=1) idx += group_length indicator = mx.symbol.BlockGrad(indicator) label = mx.symbol.BlockGrad(label) grouped_user_input_after_take = grouped_user_input net_version = "e" layer_arr = [] layer1 = mx_dnn_layer(10 * eb_dim, 128, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (1, net_version)) layer_arr.append(layer1) layer2 = mx_dnn_layer(128, 64, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (2, net_version)) layer_arr.append(layer2) layer3 = mx_dnn_layer(64, 24, active_op='', use_batch_norm=False, version="%d_%s" % (3, net_version)) layer_arr.append(layer3) layer_data = [grouped_user_input_after_take] for layer in layer_arr: layer_data.append(layer.call(layer_data[-1])) dout = layer_data[-1] inner_product = mx.sym.sum(dout * unit_id_emb, axis=1) softmax_input = mx.sym.Reshape(inner_product, shape=(bs / sample_num, sample_num)) # 用正例的label减1作为softmax的label ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2) ph_label_click = mx.sym.reshape( ph_label_click, shape=(bs / sample_num, sample_num)) - 1 ph_label_click = mx.sym.slice_axis(ph_label_click, axis=1, begin=0, end=1) ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs / sample_num, )) prop = mx.symbol.SoftmaxOutput(data=softmax_input, label=ph_label_click, normalization='valid', use_ignore=True) positive_prop = mx.sym.slice_axis(prop, axis=1, begin=0, end=1) positive_prop = mx.sym.reshape(positive_prop, shape=(bs / sample_num, )) # 实际的有效样本数量是(bs/sample_num)减去需要ignore的label数量 loss = -mx.sym.sum(mx.symbol.log(positive_prop)) / ( bs / sample_num + mx.sym.sum(ph_label_click)) user_vector = mx.sym.reshape(dout, shape=(bs / sample_num, sample_num, eb_dim)) user_vector = mx.sym.slice_axis(user_vector, axis=1, begin=0, end=1) user_vector = mx.sym.reshape(user_vector, shape=(bs / sample_num, eb_dim)) return prop, loss, mx.sym.BlockGrad(user_vector) if is_training: re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, 600, '20,20,10,10,2,2,2,1,1,1') else: re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, 1, '20,20,10,10,2,2,2,1,1,1') prop = re[0] loss = re[1] if is_training: train_op = xdl.Adam(learning_rate=intconf('learning_rate')).optimize() else: user_vector = re[2] hooks = [] if is_training: if conf("train_mode") == "sync": hooks.append( xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num())) if xdl.get_task_index() == 0: ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval')) hooks.append(ckpt_hook) log_hook = xdl.LoggerHook([loss], "#### loss:{0}") else: log_hook = xdl.LoggerHook([loss], "#### loss:{0}") hooks.append(log_hook) from xdl.python.training.training_utils import get_global_step global_step = get_global_step() sess = xdl.TrainSession(hooks) elapsed_time = 0. statis_begin_loop = 200 loop_num = 0 if not is_training: urun_re = iop.urun({"get_level_ids": key_value["start_sample_layer"]}) item_num = len(urun_re) item_ids = np.array([int(iid) for iid in urun_re.keys()], dtype=np.int64).reshape((item_num, 1)) print 'item_ids shape: ' print item_ids.shape zeros = np.zeros((item_num, 1), dtype=np.int64) hash_ids = np.concatenate((zeros, item_ids), axis=1) item_embeddings = xdl.execute( xdl.ps_sparse_pull_op(hash_ids, var_name="item_emb", var_type="hash", save_ratio=1.0, otype=xdl.DataType.float)) item_embeddings = item_embeddings.transpose() print 'item_embeddings shape: ' print item_embeddings.shape hit_num_list = [] precision_list = [] recall_list = [] gt_num_list = [] user_idx = 1 while not sess.should_stop(): print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num begin_time = time.time() for itr in xrange(200): if is_training: result = sess.run( [train_op, xdl.get_collection(xdl.UPDATE_OPS)]) else: result = sess.run( [user_vector, global_step.value, gt_ids, gt_segments]) if result is None: print "result is None, finished success." break if not is_training: print "global_step =", result[1] batch_uv = result[0] batch_gt = result[2] batch_seg = result[3] batch_uv = batch_uv[0:len(batch_seg)] batch_scores = np.matmul(batch_uv, item_embeddings) sorted_idx = np.argsort(-batch_scores, axis=1) sorted_idx = sorted_idx[:, :int( key_value["pr_test_final_layer_retrieve_num"])] gt_id_start_idx = 0 for i in xrange(len(batch_seg)): pred_set = set(item_ids[sorted_idx[i, :], 0]) gt_dict = {} for gt in batch_gt[gt_id_start_idx:batch_seg[i], 1]: if gt in gt_dict: gt_dict[gt] += 1 else: gt_dict[gt] = 1 test_gt_list = batch_gt[gt_id_start_idx:batch_seg[i], 1].tolist() test_gt_str = ','.join( [str(gtid) for gtid in test_gt_list]) test_pred_list = item_ids[sorted_idx[i, :], 0].tolist() test_pred_str = ','.join( [str(gtid) for gtid in test_pred_list]) user_idx += 1 gt_set = set(batch_gt[gt_id_start_idx:batch_seg[i], 1]) comm_set = gt_set.intersection(pred_set) hit_num = sum([ float(gt_dict[item]) if item in gt_dict else 0.0 for item in comm_set ]) hit_num_list.append(hit_num) if len(pred_set) > 0: precision = hit_num / len(pred_set) else: precision = 0.0 if len(gt_dict) > 0: recall = hit_num / (batch_seg[i] - gt_id_start_idx) else: recall = 0.0 precision_list.append(precision) recall_list.append(recall) gt_num_list.append(float(batch_seg[i] - gt_id_start_idx)) gt_id_start_idx = batch_seg[i] print "==================================================" print 'predicted user num is: %d' % len(hit_num_list) print 'gt num is: %f' % sum(gt_num_list) print 'precision: %f' % (sum(precision_list) / len(hit_num_list)) print 'recall: %f' % (sum(recall_list) / len(hit_num_list)) print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list)) print "==================================================" loop_num += 1 if loop_num > statis_begin_loop: elapsed_time += time.time() - begin_time #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time) if not is_training: print "==================================================" print 'predicted user num is: %d' % len(hit_num_list) print 'gt num is: %f' % sum(gt_num_list) print 'precision: %f' % (sum(precision_list) / len(hit_num_list)) print 'recall: %f' % (sum(recall_list) / len(hit_num_list)) print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list)) print "==================================================" if is_training: xdl.execute( xdl.ps_synchronize_leave_op( np.array(xdl.get_task_index(), dtype=np.int32))) if xdl.get_task_index() == 0: print 'start put item_emb' def _string_to_int8(src): return np.array([ord(ch) for ch in src], dtype=np.int8) from xdl.python.utils.config import get_ckpt_dir output_dir = conf('model_url') op = xdl.ps_convert_ckpt_variable_op( checkpoint_dir=_string_to_int8(get_ckpt_dir()), output_dir=_string_to_int8(output_dir), variables=_string_to_int8("item_emb")) xdl.execute(op) shell_cmd("rm -f data/item_emb") shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir) shell_cmd("sed -i 's/..//' data/item_emb") shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir) print 'finish put item_emb'
def before_run(self, v): return get_global_step().value
def train(is_training=True): if is_training or xdl.get_task_index() == 0: init() else: return file_type = xdl.parsers.txt if is_training: data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs, namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False) feature_count = 69 for i in xrange(1, feature_count + 1): data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1) data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0) data_io.batch_size(intconf('train_batch_size')) data_io.epochs(intconf('train_epochs')) data_io.threads(intconf('train_threads')) data_io.label_count(2) base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir')) data = base_path + conf('train_sample') + '_' + r'[\d]+' sharding = xdl.DataSharding(data_io.fs()) sharding.add_path(data) paths = sharding.partition(rank=xdl.get_task_index(), size=xdl.get_task_num()) print 'train: sharding.partition() =', paths data_io.add_path(paths) iop = xdl.GetIOP("TDMOP") else: data_io = xdl.DataIO("tdm", file_type=file_type, fs_type=xdl.fs.hdfs, namenode="hdfs://your/namenode/hdfs/path:9000", enable_state=False) feature_count = 69 for i in xrange(1, feature_count + 1): data_io.feature(name=("item_%s" % i), type=xdl.features.sparse, table=1) data_io.feature(name="unit_id_expand", type=xdl.features.sparse, table=0) data_io.feature(name="test_unit_id", type=xdl.features.sparse, table=1) data_io.batch_size(intconf('predict_batch_size')) data_io.epochs(intconf('predict_epochs')) data_io.threads(intconf('predict_threads')) data_io.label_count(2) base_path = '%s/%s/' % (conf('upload_url'), conf('data_dir')) data = base_path + conf('test_sample') data_io.add_path(data) print 'predict: add_path =', data iop = xdl.GetIOP("TDMPREDICTOP") #data_io.finish_delay(True) assert iop is not None key_value = {} key_value["key"] = "value" key_value["debug"] = conf('tdmop_debug') key_value["layer_counts"] = conf('tdmop_layer_counts') key_value["start_sample_layer"] = "22" key_value["pr_test_each_layer_retrieve_num"] = "400" key_value["pr_test_final_layer_retrieve_num"] = "200" if not is_training: key_value["expand_mode"] = "vector" iop.init(key_value) data_io.add_op(iop) data_io.split_group(False) data_io.startup() if not is_training: if xdl.get_task_index() == 0: saver = xdl.Saver() saver.restore(conf('saver_ckpt')) batch = data_io.read() emb_combiner = 'mean' # mean | sum if not is_training: gt_ids = batch["_ids"][-1] gt_segments = batch["_segments"][-1] emb = [] emb_dim = 24 if is_training: feature_add_probability = 1. else: feature_add_probability = 0. import xdl.python.sparse_engine.embedding as embedding emb_name = "item_emb" for i in xrange(1, feature_count + 1): eb = xdl.embedding(emb_name, batch["item_%s" % i], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability) with xdl.device('GPU'): eb_take = xdl.take_op(eb, batch["indicators"][0]) eb_take.set_shape(eb.shape) emb.append(eb_take) unit_id_expand_emb = xdl.embedding(emb_name, batch["unit_id_expand"], xdl.Normal(stddev=0.001), emb_dim, 50000, emb_combiner, vtype="hash", feature_add_probability=feature_add_probability) @xdl.mxnet_wrapper(is_training=is_training, device_type='gpu') def dnn_model_define(user_input, indicator, unit_id_emb, label, bs, eb_dim, sample_num, fea_groups, active_op='prelu', use_batch_norm=True): # 把用户输入按fea_groups划分窗口,窗口内做avg pooling fea_groups = [int(s) for s in fea_groups.split(',')] total_group_length = np.sum(np.array(fea_groups)) print "fea_groups", fea_groups, "total_group_length", total_group_length, "eb_dim", eb_dim user_input_before_reshape = mx.sym.concat(*user_input) user_input = mx.sym.reshape(user_input_before_reshape, shape=(-1, total_group_length, eb_dim)) idx = 0 for group_length in fea_groups: block_before_sum = mx.sym.slice_axis(user_input, axis=1, begin=idx, end=idx + group_length) block = mx.sym.sum_axis(block_before_sum, axis=1) / group_length if idx == 0: grouped_user_input = block else: grouped_user_input = mx.sym.concat(grouped_user_input, block, dim=1) idx += group_length indicator = mx.symbol.BlockGrad(indicator) label = mx.symbol.BlockGrad(label) grouped_user_input_after_take = grouped_user_input net_version = "e" layer_arr = [] layer1 = mx_dnn_layer(10 * eb_dim, 128, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (1, net_version)) layer_arr.append(layer1) layer2 = mx_dnn_layer(128, 64, active_op=active_op, use_batch_norm=use_batch_norm, version="%d_%s" % (2, net_version)) layer_arr.append(layer2) layer3 = mx_dnn_layer(64, 24, active_op='', use_batch_norm=False, version="%d_%s" % (3, net_version)) layer_arr.append(layer3) layer_data = [grouped_user_input_after_take] for layer in layer_arr: layer_data.append(layer.call(layer_data[-1])) dout = layer_data[-1] inner_product = mx.sym.sum(dout * unit_id_emb, axis=1) softmax_input = mx.sym.Reshape(inner_product, shape=( bs / sample_num, sample_num ) ) # 用正例的label减1作为softmax的label ph_label_click = mx.sym.slice_axis(label, axis=1, begin=1, end=2) ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs / sample_num, sample_num)) - 1 ph_label_click = mx.sym.slice_axis(ph_label_click, axis=1, begin=0, end=1) ph_label_click = mx.sym.reshape(ph_label_click, shape=(bs / sample_num, )) prop = mx.symbol.SoftmaxOutput(data=softmax_input, label=ph_label_click, normalization='valid', use_ignore=True) positive_prop = mx.sym.slice_axis(prop, axis=1, begin=0, end=1) positive_prop = mx.sym.reshape(positive_prop, shape=(bs / sample_num, ) ) # 实际的有效样本数量是(bs/sample_num)减去需要ignore的label数量 loss = -mx.sym.sum(mx.symbol.log(positive_prop)) / (bs / sample_num + mx.sym.sum(ph_label_click)) user_vector = mx.sym.reshape(dout, shape=(bs / sample_num, sample_num, eb_dim)) user_vector = mx.sym.slice_axis(user_vector, axis=1, begin=0, end=1) user_vector = mx.sym.reshape(user_vector, shape=(bs / sample_num, eb_dim)) return prop, loss, mx.sym.BlockGrad(user_vector) if is_training: re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, 600, '20,20,10,10,2,2,2,1,1,1') else: re = dnn_model_define(emb, batch["indicators"][0], unit_id_expand_emb, batch["label"], data_io._batch_size, emb_dim, 1, '20,20,10,10,2,2,2,1,1,1') prop = re[0] loss = re[1] if is_training: train_op = xdl.Adam(learning_rate=intconf('learning_rate')).optimize() else: user_vector = re[2] hooks = [] if is_training: if conf("train_mode") == "sync": hooks.append(xdl.SyncRunHook(xdl.get_task_index(), xdl.get_task_num())) if xdl.get_task_index() == 0: ckpt_hook = xdl.CheckpointHook(intconf('save_checkpoint_interval')) hooks.append(ckpt_hook) log_hook = xdl.LoggerHook([loss], "#### loss:{0}") else: log_hook = xdl.LoggerHook([loss], "#### loss:{0}") hooks.append(log_hook) from xdl.python.training.training_utils import get_global_step global_step = get_global_step() sess = xdl.TrainSession(hooks) elapsed_time = 0. statis_begin_loop = 200 loop_num = 0 if not is_training: urun_re = iop.urun({"get_level_ids": key_value["start_sample_layer"]}) item_num = len(urun_re) item_ids = np.array([int(iid) for iid in urun_re.keys()], dtype=np.int64).reshape((item_num, 1)) print 'item_ids shape: ' print item_ids.shape zeros = np.zeros((item_num, 1), dtype=np.int64) hash_ids = np.concatenate((zeros, item_ids), axis=1) item_embeddings = xdl.execute(xdl.ps_sparse_pull_op(hash_ids, var_name="item_emb", var_type="hash", save_ratio=1.0, otype=xdl.DataType.float)) item_embeddings = item_embeddings.transpose() print 'item_embeddings shape: ' print item_embeddings.shape hit_num_list = [] precision_list = [] recall_list = [] gt_num_list = [] user_idx = 1 while not sess.should_stop(): print ">>>>>>>>>>>> %d >>>>>>>>>>>" % loop_num begin_time = time.time() for itr in xrange(200): if is_training: result = sess.run([train_op, xdl.get_collection(xdl.UPDATE_OPS)]) else: result = sess.run([user_vector, global_step.value, gt_ids, gt_segments]) if result is None: print "result is None, finished success." break if not is_training: print "global_step =", result[1] batch_uv = result[0] batch_gt = result[2] batch_seg = result[3] batch_uv = batch_uv[0:len(batch_seg)] batch_scores = np.matmul(batch_uv, item_embeddings) sorted_idx = np.argsort(-batch_scores, axis=1) sorted_idx = sorted_idx[:, :int(key_value["pr_test_final_layer_retrieve_num"])] gt_id_start_idx = 0 for i in xrange(len(batch_seg)): pred_set = set(item_ids[sorted_idx[i, :], 0]) gt_dict = {} for gt in batch_gt[gt_id_start_idx:batch_seg[i], 1]: if gt in gt_dict: gt_dict[gt] += 1 else: gt_dict[gt] = 1 test_gt_list = batch_gt[gt_id_start_idx:batch_seg[i], 1].tolist() test_gt_str = ','.join([str(gtid) for gtid in test_gt_list]) test_pred_list = item_ids[sorted_idx[i, :], 0].tolist() test_pred_str = ','.join([str(gtid) for gtid in test_pred_list]) user_idx += 1 gt_set = set(batch_gt[gt_id_start_idx:batch_seg[i], 1]) comm_set = gt_set.intersection(pred_set) hit_num = sum([float(gt_dict[item]) if item in gt_dict else 0.0 for item in comm_set]) hit_num_list.append(hit_num) if len(pred_set) > 0: precision = hit_num / len(pred_set) else: precision = 0.0 if len(gt_dict) > 0: recall = hit_num / (batch_seg[i] - gt_id_start_idx) else: recall = 0.0 precision_list.append(precision) recall_list.append(recall) gt_num_list.append(float(batch_seg[i] - gt_id_start_idx)) gt_id_start_idx = batch_seg[i] print "==================================================" print 'predicted user num is: %d' % len(hit_num_list) print 'gt num is: %f' % sum(gt_num_list) print 'precision: %f' % (sum(precision_list) / len(hit_num_list)) print 'recall: %f' % (sum(recall_list) / len(hit_num_list)) print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list)) print "==================================================" loop_num += 1 if loop_num > statis_begin_loop: elapsed_time += time.time() - begin_time #print 'batch_size = %d, qps = %f batch/s' % (data_io._batch_size, (loop_num - statis_begin_loop) / elapsed_time) if not is_training: print "==================================================" print 'predicted user num is: %d' % len(hit_num_list) print 'gt num is: %f' % sum(gt_num_list) print 'precision: %f' % (sum(precision_list) / len(hit_num_list)) print 'recall: %f' % (sum(recall_list) / len(hit_num_list)) print 'global recall: %f' % (sum(hit_num_list) / sum(gt_num_list)) print "==================================================" if is_training: xdl.execute(xdl.ps_synchronize_leave_op(np.array(xdl.get_task_index(), dtype=np.int32))) if xdl.get_task_index() == 0: print 'start put item_emb' def _string_to_int8(src): return np.array([ord(ch) for ch in src], dtype=np.int8) from xdl.python.utils.config import get_ckpt_dir output_dir = conf('model_url') op = xdl.ps_convert_ckpt_variable_op(checkpoint_dir=_string_to_int8(get_ckpt_dir()), output_dir=_string_to_int8(output_dir), variables=_string_to_int8("item_emb")) xdl.execute(op) shell_cmd("rm -f data/item_emb") shell_cmd("hadoop fs -get %s/item_emb data/item_emb" % output_dir) shell_cmd("sed -i 's/..//' data/item_emb") shell_cmd("hadoop fs -put -f data/item_emb %s" % output_dir) print 'finish put item_emb'