Пример #1
0
def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
  #we only deal with not zero_debias
  assert(not zero_debias)
  var_mapping = get_collection(VAR_MAPPING, cur_model_scope())
  for x in var_mapping:
    if x[1] == variable:
      add_to_collection(BATCHNORM_TENSORS, (x[0], value, decay), cur_model_scope())
  return real_assgin_moving_average(variable, value, decay, zero_debias, name)
Пример #2
0
def get_gradient(var_name, scope=None):
    scope = scope if scope is not None else cur_model_scope()
    global _VAR_AND_GRADS
    if scope not in _VAR_AND_GRADS or \
          var_name not in _VAR_AND_GRADS[scope]:
        return None
    return _VAR_AND_GRADS[scope][var_name]
Пример #3
0
def add_variable_inputs(inputs, input_op_names):
    """deal inputs for variables defined in tf-graph"""
    var_mapping = get_collection(VAR_MAPPING, cur_model_scope())
    if var_mapping is None:
        return
    inputs.extend([x[0].value for x in var_mapping])
    input_op_names.extend([x[1].name for x in var_mapping])
Пример #4
0
def recursive_make_placeholder(x, xdl_inputs, tf_inputs):
    global _PLACEHOLDER_INFOS
    if isinstance(x, dict):
        ret = {}
        for key in x.keys():
            ret[key] = recursive_make_placeholder(x[key], xdl_inputs,
                                                  tf_inputs)
        return ret
    if isinstance(x, (tuple, list)):
        return [
            recursive_make_placeholder(item, xdl_inputs, tf_inputs)
            for item in x
        ]
    elif isinstance(x, Tensor):
        placeholder = make_placeholder(x)
        model_scope = cur_model_scope()
        if model_scope not in _PLACEHOLDER_INFOS:
            _PLACEHOLDER_INFOS[model_scope] = []
        _PLACEHOLDER_INFOS[model_scope].append(
            PlaceHolderInfo(placeholder, x.shape, x.dtype, x))
        xdl_inputs.append(x)
        tf_inputs.append(placeholder)
        return placeholder
    else:
        return x
Пример #5
0
def get_variable(self, name, shape=None, dtype=DataType.float,
                 initializer=None, regularizer=None, reuse=None,
                 trainable=True, collections=None, caching_device=None,
                 partitioner=None, validate_shape=True, use_resource=None,
                 custom_getter=None, constraint=None, **kwargs):
  global _TF_VAR_DICT
  scope = cur_model_scope()
  if scope not in _TF_VAR_DICT:
    _TF_VAR_DICT[scope] = {}
  tf_var_dict = _TF_VAR_DICT[scope]
  if name in tf_var_dict:
    if tf.get_variable_scope().reuse in [True, tf.AUTO_REUSE]:
      return tf_var_dict[name]
    else:
      raise Exception("must set reuse flag to enable reuse")

  def _custom_getter(getter, *args, **kwargs):
    tf_var = getter(*args, **kwargs)
    xdl_var = xdl_variable(
      name = name,
      shape = TF2XDL.convert_shape(shape),
      dtype = TF2XDL.convert_type(dtype),
      scope = scope,
      trainable = True,
      initializer = TF2XDL.convert_initializer(initializer))
    add_to_collection(VAR_MAPPING, (xdl_var, tf_var), scope)
    add_to_collection(BACKPROP_VARS, (name, tf_var), scope)
    tf_var_dict[name] = tf_var
    return tf_var

  return real_get_variable(self, name, shape, dtype, initializer, 
                           regularizer, reuse, trainable,
                           collections, caching_device, partitioner, 
                           validate_shape, use_resource, _custom_getter, 
                           constraint, **kwargs)
Пример #6
0
def add_var_mapping(name, var, scope=None):
    global _VAR_MAPPING
    scope = scope if scope is not None else cur_model_scope()
    if scope not in _VAR_MAPPING:
        _VAR_MAPPING[scope] = {}
    if name in _VAR_MAPPING[scope]:
        raise 'duplicate key:', name
    _VAR_MAPPING[scope][name] = var
Пример #7
0
def get_var_mapping(key, scope=None):
    global _VAR_MAPPING
    scope = scope if scope is not None else cur_model_scope()
    if scope not in _VAR_MAPPING:
        return None
    var_mapping = _VAR_MAPPING[scope]
    if key in var_mapping:
        return var_mapping[key]
    return None
Пример #8
0
 def __init__(self, hooks=None):
     current_env().sess_start()
     self._hooks = [] if hooks is None else hooks
     reader_hooks = get_collection(READER_HOOKS)
     if reader_hooks is not None:
         self._hooks.extend(reader_hooks)
     self._cur_scope = cur_model_scope()
     self._session = SimpleSession(hooks)
     self._finish = False
Пример #9
0
def get_scopes(scope):
    scopes = []
    if scope is None:
        scopes.append(cur_model_scope())
    elif isinstance(scope, (list, tuple)):
        scopes = list(scope)
    else:
        scopes = [scope]
    return list(set(scopes))
Пример #10
0
    def optimize(self, var_list=None, update_global_step=True):
        if var_list == None:
            var_list = trainable_variables()
        sparse_var_grad = []
        update_ops = []
        shared_vars = set([])
        for var in var_list:
            grad_name = get_var_mapping(var)
            grad_name = grad_name if grad_name is not None else var.name
            grad = get_gradient(grad_name, cur_model_scope())
            if grad == None:
                print(
                    "[WARNING]: no gradient found for var:%s under scope:%s" %
                    (var.name, cur_model_scope()), ", maybe not used?")
                continue

            if isinstance(grad, list):
                raise 'dupcate grad for var:', var
            if not is_embedding_var(var):
                update_ops.append(self.dense_update(var, grad))
            else:
                sparse_var_grad.append([var, grad])

        sparse_grads = self.compute_sparse_grad(sparse_var_grad)
        if len(sparse_grads) != len(sparse_var_grad):
            raise Exception("calc grad failed!")
        merged_sparse_grads = self.merge_sparse_grad(
            zip([x[0] for x in sparse_var_grad], sparse_grads))

        if get_collection("sparse_grad") == None:
            add_to_collection("sparse_grad", {})
        sparse_grad_dict = get_collection("sparse_grad")[0]
        for i in range(len(merged_sparse_grads)):
            if not isinstance(merged_sparse_grads[i][1], SparseGrad):
                raise Exception("embedding var must hava sparse grads")
            sparse_grad_dict[merged_sparse_grads[i]
                             [0].name] = merged_sparse_grads[i][1]
            update_ops.append(
                self.sparse_update(merged_sparse_grads[i][0],
                                   merged_sparse_grads[i][1].grad,
                                   merged_sparse_grads[i][1].indices))
        if update_global_step:
            update_ops.append(self.update_global_step_op())
        return update_ops
Пример #11
0
def set_tf_output(output_tensors):
  res = get_collection(BATCHNORM_TENSORS, cur_model_scope())
  if res is None:
    return
  assert(len(res) == len(output_tensors))
  update_ops = []  
  for i in range(len(res)):
    update_op = xdl.ps_apply_moving_average_op(
      var_name = res[i][0].name, value = output_tensors[i], moment = res[i][2])
    update_ops.append(update_op)
  add_to_collection(UPDATE_OPS, update_ops)
Пример #12
0
        def _wrapper(*inputs, **kwargs):
            add_to_collection(BACKEND_DEVICE_TYPE, device_type.lower())
            sym_input_dict = {}
            placeholders = []
            for x in inputs:
                placeholder = recursive_make_placeholder(x, sym_input_dict)
                placeholders.append(placeholder)

            gear_input_num = 0
            if 'gear_inputs' in kwargs:
                gear_inputs = kwargs['gear_inputs']
                gear_placeholder = recursive_make_placeholder(
                    gear_inputs, sym_input_dict, True)
                kwargs['gear_inputs'] = gear_placeholder
                gear_input_num = len(flatten(gear_inputs))

            model_outputs = model_func(*placeholders, **kwargs)
            if len(model_outputs) == 0:
                raise Exception('model_func must return loss')
            symbol_list = list(model_outputs)
            bn_statistic = get_collection(MXNET_BN_STATISTIC)
            bn_var_names = []
            bn_syms = []
            moments = []
            if bn_statistic is not None and len(bn_statistic) > 0:
                bn_var_names.extend([x[0] for x in bn_statistic])
                bn_syms.extend([x[1] for x in bn_statistic])
                moments.extend([x[2] for x in bn_statistic])

            symbol_list.extend([mx.sym.BlockGrad(x) for x in bn_syms])
            symbol = mx.sym.Group(symbol_list)
            executor = symbol.simple_bind(ctx=mx.cpu())
            add_variable_inputs(symbol,
                                sym_input_dict,
                                is_training=is_training)
            sym_names = symbol.list_arguments()
            xdl_inputs = []
            for sym in sym_names:
                xdl_inputs.append(sym_input_dict[sym])

            for aux in symbol.list_auxiliary_states():
                if aux in sym_input_dict:
                    xdl_inputs.append(sym_input_dict[aux])
                    sym_names.append(aux)

            target_size = len(executor.outputs)
            gradient_size = len(executor.grad_arrays)
            if device_type.lower() == 'cpu':
                outputs, gradients = xdl.mxnet_backend_op(
                    inputs=xdl_inputs,
                    var_name_str=','.join(sym_names),
                    device_type=device_type.lower(),
                    graph_def=serialize_graph(symbol),
                    target_size=target_size,
                    gradient_size=gradient_size if is_training else 0,
                    is_training=is_training,
                    init_grad=init_grad if init_grad is not None else np.array(
                        [], dtype=np.float32),
                    has_init_grad=True if init_grad is not None else False)
            else:
                with xdl.device('GPU'):
                    outputs, gradients = xdl.mxnet_backend_op(
                        inputs=xdl_inputs,
                        var_name_str=','.join(sym_names),
                        device_type=device_type.lower(),
                        graph_def=serialize_graph(symbol),
                        target_size=target_size,
                        gradient_size=gradient_size if is_training else 0,
                        is_training=is_training,
                        init_grad=init_grad if init_grad is not None else
                        np.array([], dtype=np.float32),
                        has_init_grad=True if init_grad is not None else False)

            bn_var_num = len(bn_var_names)
            if bn_var_num > 0:
                bn_outputs = outputs[len(outputs) - bn_var_num:]
                outputs = outputs[0:len(outputs) - bn_var_num]
                bn_update_infos = zip(bn_var_names, bn_outputs, moments)
                add_to_collection(BN_STATISTIC, bn_update_infos)
                update_ops = []
                for n, v, m in bn_update_infos:
                    update_op = xdl.ps_apply_moving_average_op(var_name=n,
                                                               value=v,
                                                               moment=m)
                    update_ops.append(update_op)
                add_to_collection(UPDATE_OPS, update_ops)

            if is_training:
                sym_names_ = []
                gradients_ = []
                if gear_input_num > 0:
                    global _GEAR_INPUTS
                    gear_grads = [None] * gear_input_num
                    for i in range(len(sym_names)):
                        if sym_names[i] not in _GEAR_INPUTS:
                            gradients_.append(gradients[i])
                            sym_names_.append(sym_names[i])
                        else:
                            index = _GEAR_INPUTS.index(sym_names[i])
                            gear_grads[index] = gradients[i]
                    for i in range(len(gear_inputs)):
                        set_gear_gradient(gear_inputs[i], gear_grads[i])
                    add_to_collection(GEAR_GRAD, gear_grads, cur_model_scope())
                    set_gradients(sym_names_, gradients_, cur_model_scope())
                else:
                    set_gradients(sym_names, gradients, cur_model_scope())
            return outputs
Пример #13
0
def get_batchnorm_tensors():
  res = get_collection(BATCHNORM_TENSORS, cur_model_scope())
  if res is None:
    return []
  return [v[1] for v in res]
Пример #14
0
def assign_moving_average(self, variable, value, momentum):
  var_mapping = get_collection(VAR_MAPPING, cur_model_scope())
  for x in var_mapping:
    if x[1] == variable:
      add_to_collection(BATCHNORM_TENSORS, (x[0], value, momentum), cur_model_scope())
  return keras_layers_assgin_moving_average(self, variable, value, momentum)
Пример #15
0
def trainable_variables(scopes=None):
    if scopes is None:
        return trainable_variables_with_scope(['', cur_model_scope()])
    else:
        return trainable_variables_with_scope(scopes)
Пример #16
0
def global_variables(scopes=None):
    if scopes is None:
        return global_variables_with_scope(['', cur_model_scope()])
    else:
        return global_variables_with_scope(scopes)
Пример #17
0
def global_initializers(scopes=None):
    if scopes is None:
        return global_initializers_with_scope(['', cur_model_scope()])
    else:
        return global_initializers_with_scope(scopes)
Пример #18
0
def variable_registers(scopes=None):
    if scopes is None:
        return variable_registers_with_scope(['', cur_model_scope()])
    else:
        return variable_registers_with_scope(scopes)
Пример #19
0
        def _wrapper(*inputs, **kwargs):
            add_to_collection(BACKEND_DEVICE_TYPE, device_type.lower())
            model_fn_inputs = []
            xdl_inputs = []
            placeholders = []

            for x in inputs:
                input = recursive_make_placeholder(x, xdl_inputs, placeholders)
                model_fn_inputs.append(input)

            gear_placeholders = []
            if 'gear_inputs' in kwargs:
                gear_inputs = kwargs['gear_inputs']
                input = recursive_make_placeholder(gear_inputs, xdl_inputs,
                                                   placeholders)
                gear_placeholders = flatten(placeholders[-len(gear_inputs):])
                kwargs['gear_inputs'] = input

            init_grad_placeholder = None
            if init_grad is not None:
                init_grad_placeholder = recursive_make_placeholder(
                    init_grad, xdl_inputs, placeholders)

            targets = model_func(*model_fn_inputs, **kwargs)
            local_init_op_names = [
                x.initializer.name for x in tf.local_variables()
            ]
            if isinstance(targets, tuple):
                targets = list(targets)
            else:
                targets = [targets]
            var_names = []
            gradient_op_names = []
            if is_training:
                loss = targets[0]
                if isinstance(loss, (list, tuple, dict)):
                    raise 'model function must reture loss as first output'
                for gear_placeholder in gear_placeholders:
                    add_to_collection(BACKPROP_VARS,
                                      ("gear_grad", gear_placeholder))
                var_names, gradient_op_names = add_backprop_ops(
                    loss, get_collection(BACKPROP_VARS,
                                         ['', cur_model_scope()]),
                    init_grad_placeholder)
            input_op_names = get_op_names(placeholders)
            target_op_names = get_op_names(targets)
            op_inputs = xdl_inputs
            add_variable_inputs(op_inputs, input_op_names)
            outputs, gradients = xdl.tfbackend_op(
                inputs=list(op_inputs),
                input_op_names=','.join(input_op_names),
                target_op_names=','.join(target_op_names),
                gradient_op_names=','.join(gradient_op_names),
                local_init_op_names=','.join(local_init_op_names),
                graph_def=serialize_graph(),
                target_size=len(target_op_names),
                gradient_size=len(gradient_op_names),
                gpu_memory_fraction=gpu_memory_fraction)

            gradients_size = len(gradients)
            gear_size = len(gear_placeholders)
            gear_grads = gradients[gradients_size - gear_size:]
            gradients = gradients[0:gradients_size - gear_size]
            var_names = var_names[0:gradients_size - gear_size]
            if len(gear_grads) > 0:
                add_to_collection(GEAR_GRAD, gear_grads, cur_model_scope())
                for i in range(len(gear_inputs)):
                    set_gear_gradient(gear_inputs[i], gear_grads[i])
            if is_training:
                set_gradients(var_names, gradients, cur_model_scope())
            return outputs
Пример #20
0
        def _wrapper(*inputs, **kwargs):
            global _TF_GRAPH_DICT
            model_scope = cur_model_scope()
            if model_scope not in _TF_GRAPH_DICT:
                _TF_GRAPH_DICT[model_scope] = tf.Graph()
            cur_graph = _TF_GRAPH_DICT[model_scope]
            with cur_graph.as_default():
                set_backend_type('tensorflow')
                add_to_collection(BACKEND_DEVICE_TYPE, device_type.lower())
                model_fn_inputs = []
                xdl_inputs = []
                placeholders = []

                for x in inputs:
                    input = recursive_make_placeholder(x, xdl_inputs,
                                                       placeholders)
                    model_fn_inputs.append(input)

                gear_placeholders = []
                if 'gear_inputs' in kwargs:
                    gear_inputs = kwargs['gear_inputs']
                    input = recursive_make_placeholder(gear_inputs, xdl_inputs,
                                                       placeholders)
                    gear_placeholders = flatten(
                        placeholders[-len(gear_inputs):])
                    #gear_placeholders = flatten(placeholders[-1:])
                    kwargs['gear_inputs'] = input

                init_grad_placeholder = None
                if init_grad is not None:
                    init_grad_placeholder = recursive_make_placeholder(
                        init_grad, xdl_inputs, placeholders)

                targets = model_func(*model_fn_inputs, **kwargs)
                local_init_op_names = [
                    x.initializer.name for x in tf.local_variables()
                ]
                if isinstance(targets, tuple):
                    targets = list(targets)
                else:
                    targets = [targets]
                # add batch_normalization
                batchnorm_begin = len(targets)
                batchnorm_tensors = tf_batchnorm_hook.get_batchnorm_tensors()
                batchnorm_size = len(batchnorm_tensors)
                targets.extend(batchnorm_tensors)
                # add trace tensors
                trace_tensors = trace.get_tensors(['tf', 'tf_sparse_assign'])
                trace_size = len(trace_tensors)
                targets.extend(trace_tensors)

                var_names = []
                gradient_op_names = []
                if is_training:
                    loss = targets[0]
                    if isinstance(loss, (list, tuple, dict)):
                        raise 'model function must reture loss as first output'
                    for gear_placeholder in gear_placeholders:
                        add_to_collection(BACKPROP_VARS,
                                          ("gear_grad", gear_placeholder))
                    var_names, gradient_op_names = add_backprop_ops(
                        loss, get_collection(BACKPROP_VARS, ['', model_scope]),
                        init_grad_placeholder)
                input_op_names = get_op_names(placeholders)
                target_op_names = get_op_names(targets)
                target_op_types = get_op_types(targets)
                op_inputs = xdl_inputs
                add_variable_inputs(op_inputs, input_op_names)
                outputs, gradients = xdl.tfbackend_op(
                    inputs=list(op_inputs),
                    output_type=target_op_types,
                    input_op_names=','.join(input_op_names),
                    target_op_names=','.join(target_op_names),
                    gradient_op_names=','.join(gradient_op_names),
                    local_init_op_names=','.join(local_init_op_names),
                    graph_def=serialize_graph(model_scope=model_scope),
                    gradient_size=len(gradient_op_names),
                    gpu_memory_fraction=gpu_memory_fraction)

                gradients_size = len(gradients)
                gear_size = len(gear_placeholders)
                gear_grads = gradients[gradients_size - gear_size:]
                gradients = gradients[0:gradients_size - gear_size]
                var_names = var_names[0:gradients_size - gear_size]
                if len(gear_grads) > 0:
                    add_to_collection(GEAR_GRAD, gear_grads, cur_model_scope())
                    for i in range(len(gear_inputs)):
                        set_gear_gradient(gear_inputs[i], gear_grads[i])
                if is_training:
                    set_gradients(var_names, gradients, cur_model_scope())
                # set trace output
                trace_output = [] if trace_size == 0 else outputs[-trace_size:]
                trace.set_values(['tf', 'tf_sparse_assign'], trace_output)
                if batchnorm_size == 0:
                    batchnorm_output = []
                elif trace_size != 0:
                    batchnorm_output = outputs[-(trace_size +
                                                 batchnorm_size):-trace_size]
                else:
                    batchnorm_output = outputs[-(trace_size + batchnorm_size):]
                tf_batchnorm_hook.set_tf_output(batchnorm_output)
                return outputs if (
                    trace_size == 0 and batchnorm_size
                    == 0) else outputs[:-(trace_size + batchnorm_size)]