Exemplo n.º 1
0
    def check_module_ctx_group(ctxs, group2ctxs, grad_ctxs=None):
        with mx.AttrScope(ctx_group='dev1'):
            a = mx.symbol.Variable('a')
            a = a * 2
        with mx.AttrScope(ctx_group='dev2'):
            b = mx.symbol.Variable('b')
            c = a + b
        shape = (2, 5)
        mod1 = mx.mod.Module(c, context=ctxs, data_names=['a', 'b'], label_names=None,
                             group2ctxs=group2ctxs)
        mod1.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True)
        mod1.init_params()
        mod1.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True)
        mod1.backward([mx.nd.ones(shape)])
        mod1_input_grads = mod1.get_input_grads()

        mod2 = mx.mod.Module(c, context=ctxs, data_names=['a', 'b'], label_names=None)
        mod2.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True)
        mod2.init_params()
        mod2.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True)
        mod2.backward([mx.nd.ones(shape)])
        mod2_input_grads = mod2.get_input_grads()

        if grad_ctxs is not None:
            assert(mod1_input_grads[0].context == grad_ctxs[0])
            assert(mod1_input_grads[1].context == grad_ctxs[1])
        assert(np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy()))
        assert(np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy()))
Exemplo n.º 2
0
def test_module_ctx_group():
    with mx.AttrScope(ctx_group='dev1'):
        a = mx.symbol.Variable('a')
        a = a * 2
    with mx.AttrScope(ctx_group='dev2'):
        b = mx.symbol.Variable('b')
        c = a + b
    shape = (2, 5)
    mod1 = mx.mod.Module(c, context=[mx.cpu(0)], data_names=['a', 'b'], label_names=None,
                         group2ctxs=[{'dev1':mx.cpu(1),'dev2':mx.cpu(2)}])
    mod1.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True)
    mod1.init_params()
    mod1.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True)
    mod1.backward([mx.nd.ones(shape)])
    mod1_input_grads = mod1.get_input_grads()

    mod2 = mx.mod.Module(c, data_names=['a', 'b'], label_names=None)
    mod2.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True)
    mod2.init_params()
    mod2.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True)
    mod2.backward([mx.nd.ones(shape)])
    mod2_input_grads = mod2.get_input_grads()

    assert np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy())
    assert np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy())
Exemplo n.º 3
0
    def sym_gen(seq_len):
        with mx.AttrScope(ctx_group='dev1'):
            data = mx.symbol.Variable('data')
            weight = mx.symbol.Variable('dev1_weight')
            bias = mx.symbol.Variable('dev1_bias')
            fc = data
            for i in range(seq_len):
                fc = mx.symbol.FullyConnected(data=fc,
                                              weight=weight,
                                              bias=bias,
                                              name='dev1_fc_%d' % i,
                                              num_hidden=num_hidden)
        with mx.AttrScope(ctx_group='dev2'):
            label = mx.symbol.Variable('label')
            weight = mx.symbol.Variable('dev2_weight')
            bias = mx.symbol.Variable('dev2_bias')
            for i in range(seq_len):
                fc = mx.symbol.FullyConnected(data=fc,
                                              weight=weight,
                                              bias=bias,
                                              name='dev2_fc_%d' % i,
                                              num_hidden=num_hidden)
            sym = mx.symbol.SoftmaxOutput(fc, label, name='softmax')

        return sym, ('data', ), ('label', )
Exemplo n.º 4
0
def test_load_000800():
    with mx.AttrScope(ctx_group='stage1'):
        data = mx.symbol.Variable('data', lr_mult=0.2)
        weight = mx.sym.Variable(name='fc1_weight', lr_mult=1.2)
        fc1  = mx.symbol.FullyConnected(data = data, weight=weight, name='fc1', num_hidden=128, wd_mult=0.3)
        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")

    set_stage1 = set(act1.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, lr_mult=0.01)
        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
        fc3 = mx.symbol.BatchNorm(fc3, name='batchnorm0')
        sym1  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')

    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    sym2 = mx.sym.load(os.path.join(curr_path, 'save_000800.json'))

    attr1 = sym1.attr_dict()
    attr2 = sym2.attr_dict()
    for k, v1 in attr1.items():
        assert k in attr2, k
        v2 = attr2[k]
        for kk, vv1 in v1.items():
            if kk.startswith('__') and kk.endswith('__'):
                assert kk in v2 and v2[kk] == vv1, k + str(v1) + str(v2)

    check_symbol_consistency(sym1, sym2,
        {'ctx': mx.cpu(0), 'group2ctx': {'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2)}, 'data': (1,200)})
def test_ctx_group():
    with mx.AttrScope(ctx_group='stage1'):
        data = mx.symbol.Variable('data')
        fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")

    set_stage1 = set(act1.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
        fc3 = mx.symbol.BatchNorm(fc3)
        mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')

    set_stage2 = set(mlp.list_arguments()) - set_stage1

    group2ctx = {
        'stage1' : mx.cpu(1),
        'stage2' : mx.cpu(2)
    }

    texec = mlp.simple_bind(mx.cpu(0),
                            group2ctx=group2ctx,
                            data=(1,200))

    for arr, name in zip(texec.arg_arrays, mlp.list_arguments()):
        if name in set_stage1:
            assert arr.context == group2ctx['stage1']
        else:
            assert arr.context == group2ctx['stage2']
Exemplo n.º 6
0
def test_operator():
    data = mx.symbol.Variable('data')
    with mx.AttrScope(group='4', data='great'):
        fc1 = mx.symbol.Activation(data, act_type='relu')
        with mx.AttrScope(init_bias='0.0'):
            fc2 = mx.symbol.FullyConnected(fc1, num_hidden=10, name='fc2')
    assert fc1.attr('data') == 'great'
    fc2copy = pkl.loads(pkl.dumps(fc2))
    assert fc2copy.tojson() == fc2.tojson()
    fc2weight = fc2.get_internals()['fc2_weight']
def test_chain():
    ctx1 = mx.cpu(0)
    ctx2 = mx.cpu(1)
    n = 2
    data1 = mx.sym.Variable('data1')
    data2 = mx.sym.Variable('data2')
    data3 = mx.sym.Variable('data3')
    with mx.AttrScope(ctx_group='dev1'):
        net = data1 + data2
        net = net * 3

    with mx.AttrScope(ctx_group='dev2'):
        net = net + data3

    arr = []
    arr_grad = []
    shape = (4, 5)
    with mx.Context(ctx1):
        for i in range(n):
            arr.append(mx.nd.empty(shape))
            arr_grad.append(mx.nd.empty(shape))
    with mx.Context(ctx2):
        arr.append(mx.nd.empty(shape))
        arr_grad.append(mx.nd.empty(shape))

    exec1 = net.bind(ctx1,
                     args=arr,
                     args_grad=arr_grad,
                     group2ctx={
                         'dev1': ctx1,
                         'dev2': ctx2
                     })
    arr[0][:] = 1.0
    arr[1][:] = 2.0
    arr[2][:] = 3.0
    arr2 = [a.copyto(ctx1) for a in arr]
    arr_grad2 = [a.copyto(ctx1) for a in arr_grad]
    exec2 = net.bind(ctx1, args=arr2, args_grad=arr_grad2)

    # Show the execution plan that involves copynode
    print(exec1.debug_str())
    exec1.forward(is_train=True)
    exec2.forward(is_train=True)
    assert reldiff(exec1.outputs[0].asnumpy(),
                   exec2.outputs[0].asnumpy()) < 1e-6
    out_grad = mx.nd.empty(shape, ctx1)
    out_grad[:] = 1.0
    exec1.backward([out_grad])
    exec2.backward([out_grad.copyto(ctx1)])
    for a, b in zip(arr_grad, arr_grad2):
        assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6
Exemplo n.º 8
0
def _attr_scope_lr(attr_type):
    # weight (lr_mult, wd_mult); bias;
    # 1, 1; 2, 0;
    # so apply this to bias only
    if attr_type == 'alex':
        return mx.AttrScope(lr_mult='2.', wd_mult='0.')
    # 0, 0; 0, 0;
    # so apply this to both
    if attr_type == 'fixed':
        return mx.AttrScope(lr_mult='0.', wd_mult='0.')
    # 1, 1; 1, 1;
    # so it is ok to do nothing
    else:
        return mx.AttrScope()
Exemplo n.º 9
0
def matrix_fact_model_parallel_net(factor_size, num_hidden, max_user,
                                   max_item):
    # set ctx_group attribute to 'dev1' for the symbols created in this scope,
    # the symbols will be bound to the context that 'dev1' map to in group2ctxs
    with mx.AttrScope(ctx_group='dev1'):
        # input
        user = mx.symbol.Variable('user')
        item = mx.symbol.Variable('item')
        # user feature lookup
        user_weight = mx.symbol.Variable('user_weight')
        user = mx.symbol.Embedding(data=user,
                                   weight=user_weight,
                                   input_dim=max_user,
                                   output_dim=factor_size)
        # item feature lookup
        item_weight = mx.symbol.Variable('item_weight')
        item = mx.symbol.Embedding(data=item,
                                   weight=item_weight,
                                   input_dim=max_item,
                                   output_dim=factor_size)

    # set ctx_group attribute to 'dev2' for the symbols created in this scope,
    # the symbols will be bound to the context that 'dev2' map to in group2ctxs
    with mx.AttrScope(ctx_group='dev2'):
        # non-linear transformation of user features
        user = mx.symbol.Activation(data=user, act_type='relu')
        fc_user_weight = mx.symbol.Variable('fc_user_weight')
        fc_user_bias = mx.symbol.Variable('fc_user_bias')
        user = mx.symbol.FullyConnected(data=user,
                                        weight=fc_user_weight,
                                        bias=fc_user_bias,
                                        num_hidden=num_hidden)
        # non-linear transformation of user features
        item = mx.symbol.Activation(data=item, act_type='relu')
        fc_item_weight = mx.symbol.Variable('fc_item_weight')
        fc_item_bias = mx.symbol.Variable('fc_item_bias')
        item = mx.symbol.FullyConnected(data=item,
                                        weight=fc_item_weight,
                                        bias=fc_item_bias,
                                        num_hidden=num_hidden)
        # predict by the inner product, which is element-wise product and then sum
        pred = user * item
        pred = mx.symbol.sum(data=pred, axis=1)
        pred = mx.symbol.Flatten(data=pred)
        # label
        score = mx.symbol.Variable('score')
        # loss layer
        pred = mx.symbol.LinearRegressionOutput(data=pred, label=score)
    return pred
Exemplo n.º 10
0
def get_symbol(args, arg_params, aux_params):
  data_shape = (args.image_channel,args.image_h,args.image_w)
  image_shape = ",".join([str(x) for x in data_shape])
  margin_symbols = []

  print('init %s, num_layers: %d' % (network, num_layers))
  with mx.AttrScope(ctx_group='dev0'):
    embedding = eval(network).get_symbol(args.emb_size, num_layers, shake_drop=args.shake_drop,
        version_se=args.version_se, version_input=args.version_input, 
        version_output=args.version_output, version_unit=args.version_unit,
        version_act=args.version_act, width_mult = args.width_mult, version_bn=args.version_bn, 
        bn_mom = args.bn_mom)

  gt_label = mx.symbol.Variable('softmax_label')
  nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')
  anchor = mx.symbol.slice_axis(nembedding, axis=0, begin=0, end=args.per_batch_size//3)
  positive = mx.symbol.slice_axis(nembedding, axis=0, begin=args.per_batch_size//3, end=2*args.per_batch_size//3)
  negative = mx.symbol.slice_axis(nembedding, axis=0, begin=2*args.per_batch_size//3, end=args.per_batch_size)
  ap = anchor - positive
  an = anchor - negative
  ap = ap*ap
  an = an*an
  ap = mx.symbol.sum(ap, axis=1, keepdims=1) #(T,1)
  an = mx.symbol.sum(an, axis=1, keepdims=1) #(T,1)
  triplet_loss = mx.symbol.Activation(data = (ap-an+args.triplet_alpha), act_type='relu')
  triplet_loss = mx.symbol.mean(triplet_loss)
  #triplet_loss = mx.symbol.sum(triplet_loss)/(args.per_batch_size//3)
  triplet_loss = mx.symbol.MakeLoss(triplet_loss)
  out_list = [mx.symbol.BlockGrad(embedding)]
  out_list.append(mx.sym.BlockGrad(gt_label))
  out_list.append(triplet_loss)
  out = mx.symbol.Group(out_list)
  return (out, arg_params, aux_params)
Exemplo n.º 11
0
def bn(data, name, eps=1e-5, fix_gamma=False, use_global_stats=None):
    if use_global_stats is None:
        use_global_stats = cfg.get('use_global_stats', False)
    if fix_gamma:
        with mx.AttrScope(lr_mult='0.', wd_mult='0.'):
            gamma = mx.sym.Variable('{}_gamma'.format(name))
            beta = mx.sym.Variable('{}_beta'.format(name))
        return mx.sym.BatchNorm(data=data,
                                gamma=gamma,
                                beta=beta,
                                name=name,
                                eps=eps,
                                fix_gamma=True,
                                use_global_stats=use_global_stats)
    else:
        lr_type = cfg.get('lr_type', 'torch')
        assert lr_type in ('alex', 'torch')
        with _attr_scope_lr(lr_type):
            beta = mx.sym.Variable('{}_beta'.format(name))
        return mx.sym.BatchNorm(data=data,
                                beta=beta,
                                name=name,
                                eps=eps,
                                fix_gamma=False,
                                use_global_stats=use_global_stats)
def test_attr_basic():
    with mx.AttrScope(group='4', data='great'):
        data = mx.symbol.Variable('data', attr={'dtype': 'data', 'group': '1'})
        gdata = mx.symbol.Variable('data2')
    assert gdata.attr('group') == '4'
    assert data.attr('group') == '1'
    data2 = pkl.loads(pkl.dumps(data))
    assert data.attr('dtype') == data2.attr('dtype')
Exemplo n.º 13
0
def _attr_scope_lr(lr_type, lr_owner):
    assert lr_type in ('alex', 'alex10', 'torch', 'psp', 'mxnet')
    # weight (lr_mult, wd_mult); bias;
    # 1, 1; 2, 0;
    if lr_type == 'alex':
        if lr_owner == 'weight':
            return mx.AttrScope()
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='2.', wd_mult='0.')
        else:
            assert False
    # 10, 1; 20, 0;
    if lr_type == 'alex10':
        if lr_owner == 'weight':
            return mx.AttrScope(lr_mult='10.', wd_mult='1.')
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='20.', wd_mult='0.')
        else:
            assert False
    # 0, 0; 0, 0;
    # so apply this to both
    if lr_type == 'fixed':
        assert lr_owner in ('weight', 'bias')
        return mx.AttrScope(lr_mult='0.', wd_mult='0.')
    # 1, 0; 1, 0;
    if lr_type == 'psp':
        assert lr_owner in ('weight', 'bias')
        return mx.AttrScope(wd_mult='0.')

    if lr_type == "mxnet":
        return mx.AttrScope()
Exemplo n.º 14
0
def Softmax(embedding, gt_label, name, args, cvd=None):
    if cvd is None:
        _weight = mx.symbol.Variable(name + "_weight",
                                     shape=(args.ctx_num_classes,
                                            args.emb_size),
                                     lr_mult=args.fc7_lr_mult,
                                     wd_mult=args.fc7_wd_mult,
                                     init=mx.init.Normal(0.01))
        if args.fc7_no_bias:
            fc7 = mx.sym.FullyConnected(data=embedding,
                                        weight=_weight,
                                        no_bias=True,
                                        num_hidden=args.ctx_num_classes,
                                        name=name)
        else:
            _bias = mx.symbol.Variable(name + '_bias',
                                       lr_mult=2.0,
                                       wd_mult=0.0)
            fc7 = mx.sym.FullyConnected(data=embedding,
                                        weight=_weight,
                                        bias=_bias,
                                        num_hidden=args.ctx_num_classes,
                                        name=name)
        return fc7
    else:
        fc7_subs = []
        for ctx_id in range(len(cvd)):
            with mx.AttrScope(ctx_group='dev%d' % (ctx_id + 1)):
                _weight = mx.symbol.Variable(name % ctx_id + "_weight",
                                             shape=(args.ctx_num_classes,
                                                    args.emb_size),
                                             lr_mult=args.fc7_lr_mult,
                                             wd_mult=args.fc7_wd_mult,
                                             init=mx.init.Normal(0.01))
                if args.fc7_no_bias:
                    fc7_sub = mx.sym.FullyConnected(
                        data=embedding,
                        weight=_weight,
                        no_bias=True,
                        num_hidden=args.ctx_num_classes,
                        name=name % ctx_id)
                else:
                    _bias = mx.symbol.Variable(name % ctx_id + '_bias',
                                               lr_mult=2.0,
                                               wd_mult=0.0)
                    fc7_sub = mx.sym.FullyConnected(
                        data=embedding,
                        weight=_weight,
                        bias=_bias,
                        num_hidden=args.ctx_num_classes,
                        name=name % ctx_id)
                fc7_subs.append(fc7_sub)
        fc7 = mx.sym.concat(*fc7_subs, dim=1, name=name + '_concat')
        return fc7
Exemplo n.º 15
0
def test_chain():
    n = 2
    data1 = mx.sym.Variable('data1')
    data2 = mx.sym.Variable('data2')
    with mx.AttrScope(ctx_group='dev1'):
        net = data1 + data2
        net = net * 3

    with mx.AttrScope(ctx_group='dev2'):
        net = net + data1

    with mx.Context(mx.cpu(0)):
        shape = (4, 5)
        arr = [mx.nd.empty(shape) for i in range(n)]
        arr_grad = [mx.nd.empty(shape) for i in range(n)]

    exec1 = net.bind(mx.cpu(),
                     args=arr,
                     args_grad=arr_grad,
                     group2ctx={
                         'dev1': mx.cpu(0),
                         'dev2': mx.cpu(1)
                     })
    arr[0][:] = 1.0
    arr[1][:] = 2.0
    arr2 = [a.copyto(mx.cpu()) for a in arr]
    arr_grad2 = [a.copyto(mx.cpu()) for a in arr_grad]
    exec2 = net.bind(mx.cpu(), args=arr2, args_grad=arr_grad2)

    # Show the execution plan that involves copynode
    print(exec1.debug_str())
    exec1.forward()
    exec2.forward()
    assert reldiff(exec1.outputs[0].asnumpy(),
                   exec2.outputs[0].asnumpy()) < 1e-6
    out_grad = mx.nd.empty(shape, mx.cpu(1))
    out_grad[:] = 1.0
    exec1.backward([out_grad])
    exec2.backward([out_grad.copyto(mx.cpu())])
    for a, b in zip(arr_grad, arr_grad2):
        assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6
Exemplo n.º 16
0
    def encode(self, data: mx.sym.Symbol, data_length: mx.sym.Symbol,
               seq_len: int) -> mx.sym.Symbol:
        """
        Encodes data given sequence lengths of individual examples (data_length) and maximum sequence length (seq_len).

        :param data: Input data.
        :param data_length: Vector with sequence lengths.
        :param seq_len: Maximum sequence length.
        :return: Encoded input data.
        """
        with mx.AttrScope(__layout__=C.TIME_MAJOR):
            return mx.sym.swapaxes(data=data, dim1=0, dim2=1)
Exemplo n.º 17
0
    def encode(self, data: mx.sym.Symbol, data_length: Optional[mx.sym.Symbol],
               seq_len: int) -> Tuple[mx.sym.Symbol, mx.sym.Symbol, int]:
        """
        Encodes data given sequence lengths of individual examples and maximum sequence length.

        :param data: Input data.
        :param data_length: Vector with sequence lengths.
        :param seq_len: Maximum sequence length.
        :return: Encoded versions of input data (data, data_length, seq_len).
        """
        with mx.AttrScope(__layout__=self.target_layout):
            return mx.sym.swapaxes(data=data, dim1=0,
                                   dim2=1), data_length, seq_len
Exemplo n.º 18
0
def check_ctx_group_sparse(lhs_stype, rhs_stype):
    with mx.AttrScope(ctx_group='stage1'):
        lhs = mx.symbol.Variable('lhs', storage_type=lhs_stype)
        rhs = mx.symbol.Variable('rhs', storage_type=rhs_stype)
        plus = mx.symbol.elemwise_add(lhs, rhs, name='plus')

    set_stage1 = set(plus.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        softmax = mx.symbol.SoftmaxOutput(data=plus, name='softmax')

    set_stage2 = set(softmax.list_arguments()) - set_stage1

    group2ctx = {'stage1': mx.cpu(1), 'stage2': mx.cpu(2)}
    texec = softmax.simple_bind(mx.cpu(0),
                                group2ctx=group2ctx,
                                lhs=(1, 200),
                                rhs=(1, 200))

    for arr, name in zip(texec.arg_arrays, softmax.list_arguments()):
        if name in set_stage1:
            assert arr.context == group2ctx['stage1']
        else:
            assert arr.context == group2ctx['stage2']
def test_ctx_group_sparse():
    with mx.AttrScope(ctx_group='stage1'):
        lhs = mx.symbol.Variable('lhs', stype='csr')
        rhs = mx.symbol.Variable('rhs', stype='row_sparse')
        dot  = mx.symbol.dot(lhs, rhs, name='dot')

    set_stage1 = set(dot.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        softmax  = mx.symbol.SoftmaxOutput(data = dot, name = 'softmax')

    set_stage2 = set(softmax.list_arguments()) - set_stage1

    group2ctx = {
        'stage1' : mx.cpu(1),
        'stage2' : mx.cpu(2)
    }
    texec = softmax.simple_bind(mx.cpu(0), group2ctx=group2ctx,
                                lhs=(32,200), rhs=(200, 5))

    for arr, name in zip(texec.arg_arrays, softmax.list_arguments()):
        if name in set_stage1:
            assert arr.context == group2ctx['stage1']
        else:
            assert arr.context == group2ctx['stage2']
Exemplo n.º 20
0
def test_attr_basic():
    with mx.AttrScope(group='4', data='great'):
        data = mx.symbol.Variable('data',
                                  attr={
                                      'dtype': 'data',
                                      'group': '1',
                                      'force_mirroring': 'True'
                                  },
                                  lr_mult=1)
        gdata = mx.symbol.Variable('data2')
    assert gdata.attr('group') == '4'
    assert data.attr('group') == '1'
    assert data.attr('lr_mult') == '1'
    assert data.attr('__lr_mult__') == '1'
    assert data.attr('force_mirroring') == 'True'
    assert data.attr('__force_mirroring__') == 'True'
    data2 = pkl.loads(pkl.dumps(data))
    assert data.attr('dtype') == data2.attr('dtype')
Exemplo n.º 21
0
    def bn(self,
           data,
           name,
           eps=1.001e-5,
           fix_gamma=False,
           use_global_stats=None,
           cudnn_off=False,
           relu=False):
        """
            batch normalization wrapper
        """
        if use_global_stats is None:
            use_global_stats = cfg.get('bn_use_global_stats', False)

        if fix_gamma:
            with mx.AttrScope(lr_mult='0.', wd_mult='0.'):
                gamma = mx.sym.Variable('{}_gamma'.format(name))
                beta = mx.sym.Variable('{}_beta'.format(name))
            output = mx.sym.BatchNorm(data=data,
                                      gamma=gamma,
                                      beta=beta,
                                      name=name,
                                      eps=eps,
                                      fix_gamma=True,
                                      use_global_stats=use_global_stats,
                                      cudnn_off=cudnn_off)
        else:
            lr_type = cfg.get('lr_type', 'torch')
            with _attr_scope_lr(lr_type, 'weight'):
                gamma = mx.sym.Variable('{}_gamma'.format(name))
            with _attr_scope_lr(lr_type, 'bias'):
                beta = mx.sym.Variable('{}_beta'.format(name))
            output = mx.sym.BatchNorm(data=data,
                                      gamma=gamma,
                                      beta=beta,
                                      name=name,
                                      eps=eps,
                                      fix_gamma=False,
                                      use_global_stats=use_global_stats)

        if relu:
            output = mx.sym.relu(output)

        return output
Exemplo n.º 22
0
    def __init__(self,
                 input_order =  3,
                 input_size  =100,
                 core_number =  4,
                 core_size   = 25):
        """
        input_order (int): Order of input-CNN filter 
        input_size  (int): Size of input vector
        core_number (int): Number of switching core
        core_size   (int): Size of switching core
        """

        self._io = input_order
        self._is = input_size
        self._cn = core_number
        self._cs = core_size

        with mx.AttrScope(group='Swth', data='Command'):
            self.initials()
def _attr_scope_lr(lr_type, lr_owner):
    assert lr_owner in ('weight', 'bias')
    if lr_type == 'alex':
        if lr_owner == 'weight':
            return mx.AttrScope(lr_mult='1.', wd_mult='1.')
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='2.', wd_mult='0.')
    elif lr_type == 'alex10':
        if lr_owner == 'weight':
            return mx.AttrScope(lr_mult='10.', wd_mult='1.')
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='20.', wd_mult='0.')
    elif lr_type == 'torch10':
        if lr_owner == 'weight':
            return mx.AttrScope(lr_mult='10.', wd_mult='1.')
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='10.', wd_mult='0.')
    elif lr_type == 'zeros':
        return mx.AttrScope(lr_mult='0.', wd_mult='0.')
    else:
        return mx.AttrScope()
Exemplo n.º 24
0
def get_module(args, data_shapes):
  network, num_layers = args.network.split(',')
  num_layers = int(num_layers)
  data_shape = (3, 112, 112)
  image_shape = ",".join([str(x) for x in data_shape])

  with mx.AttrScope(ctx_group='dev0'):
    embedding = eval(network).get_symbol(args.emb_size, num_layers, shake_drop=args.shake_drop,
        version_se=args.version_se, version_input=args.version_input, 
        version_output=args.version_output, version_unit=args.version_unit,
        version_act=args.version_act, width_mult = args.width_mult, version_bn=args.version_bn, 
        bn_mom = args.bn_mom)

  #out_list = [mx.symbol.BlockGrad(embedding)]
  #sym = mx.symbol.Group(out_list)

  ctx = [mx.gpu(0)]
  #all_layers = sym.get_internals()
  all_layers = embedding.get_internals()
  out_list = [all_layers[layer] for layer in['bn1_output', 'fc1_output']]
  sym = mx.symbol.Group(out_list)
  
  model = mx.mod.Module(
      context       = ctx,
      symbol        = sym,
      data_names    = ['data'],
      label_names   = None
  )

  model.bind(for_training=False, data_shapes=data_shapes)

  if args.pretrained != '':
    print(args.pretrained)
    vec = args.pretrained.split(',')
    _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1]))
    model.set_params(arg_params, aux_params, allow_extra=True, allow_missing=True) 

  return model
Exemplo n.º 25
0
Arquivo: RSU.py Projeto: wythe51/RSU
    def __init__(self,
                 input_order=3,
                 input_size=100,
                 core_number=4,
                 core_size=25):
        """
        input_order (int): Order of input-CNN filter 
        input_size  (int): Size of input vector
        core_number (int): Number of switching core
        core_size   (int): Size of switching core
        """

        self._io = input_order
        self._is = input_size
        self._cn = core_number
        self._cs = core_size

        self.ctrl = _CNNControl(self._io, self._is, self._cn, self._cs)

        self.swch = _SoftmaxSwitch(self._io, self._is, self._cn, self._cs)

        with mx.AttrScope(group='RSU'):
            self.initials()
Exemplo n.º 26
0
def get_symbol(network, num_layers, args, arg_params, aux_params):
  data_shape = (args.image_channel,args.image_h,args.image_w)
  image_shape = ",".join([str(x) for x in data_shape])
  margin_symbols = []

  print('init %s, num_layers: %d' % (network, num_layers))
  with mx.AttrScope(ctx_group='dev0'):
    embedding = eval(network).get_symbol(args.emb_size, num_layers, shake_drop=args.shake_drop,
        version_se=args.version_se, version_input=args.version_input, 
        version_output=args.version_output, version_unit=args.version_unit,
        version_act=args.version_act, width_mult = args.width_mult, version_bn=args.version_bn, 
        bn_mom = args.bn_mom, use_global_stats=True)

  if network=='fspherenet':
    data_shape_dict = {'data' : (args.per_batch_size,)+data_shape}
    fspherenet.init_weights(sym, data_shape_dict, int(num_layers))

  gt_label = mx.symbol.Variable('softmax_label')
  label = mx.sym.slice_axis(gt_label, axis=0, begin=0, end=args.per_batch_size // 2)
  
  nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')
  nembedding1 = mx.symbol.slice_axis(nembedding, axis=0, begin=0, end=args.per_batch_size//2)
  nembedding2 = mx.symbol.slice_axis(nembedding, axis=0, begin=args.per_batch_size//2, end=2*args.per_batch_size//2)

  cos_simi = mx.sym.sum(nembedding1 * nembedding2, axis=1, keepdims=1)
  target = mx.sym.where(label, 0.64 - cos_simi, cos_simi - 0.36)
  pairwise_loss = mx.symbol.clip(target, 0, 1)
  pairwise_loss = mx.symbol.mean(pairwise_loss)
  pairwise_loss = mx.symbol.MakeLoss(pairwise_loss)

  out_list = [mx.symbol.BlockGrad(embedding)]
  out_list.append(mx.sym.BlockGrad(gt_label))
  out_list.append(mx.sym.BlockGrad(cos_simi))
  out_list.append(mx.sym.BlockGrad(target))
  out_list.append(pairwise_loss)
  out = mx.symbol.Group(out_list)
  return (out, arg_params, aux_params)
Exemplo n.º 27
0
def get_fc7(embedding, name, args, cvd=None):
    nembedding = mx.symbol.L2Normalization(embedding,
                                           mode='instance',
                                           name=name + '_norm')
    if cvd is None:
        _weight = mx.symbol.Variable(name + "_weight",
                                     shape=(args.ctx_num_classes,
                                            args.emb_size),
                                     attr={
                                         'lr_mult': str(args.fc7_lr_mult),
                                         'wd_mult': str(args.fc7_wd_mult)
                                     },
                                     init=mx.init.Normal(0.01))
        fc7 = mx.sym.FullyConnected(data=nembedding,
                                    weight=_weight,
                                    num_hidden=args.ctx_num_classes,
                                    no_bias=True,
                                    normalize=True,
                                    name=name)
    else:
        fc7_subs = []
        for ctx_id in range(len(cvd)):
            with mx.AttrScope(ctx_group='dev%d' % (ctx_id + 1)):
                _weight = mx.symbol.Variable(name % ctx_id + '_weight',
                                             shape=(args.ctx_num_classes,
                                                    args.emb_size))
                fc7_sub = mx.sym.FullyConnected(
                    data=nembedding,
                    weight=_weight,
                    num_hidden=args.ctx_num_classes,
                    no_bias=True,
                    normalize=True,
                    name=name % ctx_id)
                fc7_subs.append(fc7_sub)
        fc7 = mx.sym.concat(*fc7_subs, dim=1, name=name + '_concat')
    return fc7
Exemplo n.º 28
0
def lstm_unroll(num_lstm_layer, seq_len, input_size,
                num_hidden, num_embed, num_label, dropout=0.,
                concat_decode=True, use_loss=False):
    """unrolled lstm network"""
    # initialize the parameter symbols
    with mx.AttrScope(ctx_group='embed'):
        embed_weight=mx.sym.Variable("embed_weight")

    with mx.AttrScope(ctx_group='decode'):
        cls_weight = mx.sym.Variable("cls_weight")
        cls_bias = mx.sym.Variable("cls_bias")

    param_cells = []
    last_states = []
    for i in range(num_lstm_layer):
        with mx.AttrScope(ctx_group='layer%d' % i):
            param_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i),
                                         i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i),
                                         h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i),
                                         h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i)))
            state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i),
                              h=mx.sym.Variable("l%d_init_h" % i))
        last_states.append(state)
    assert(len(last_states) == num_lstm_layer)

    last_hidden = []
    for seqidx in range(seq_len):
        # embeding layer
        with mx.AttrScope(ctx_group='embed'):
            data = mx.sym.Variable("t%d_data" % seqidx)
            hidden = mx.sym.Embedding(data=data, weight=embed_weight,
                                      input_dim=input_size,
                                      output_dim=num_embed,
                                      name="t%d_embed" % seqidx)
        # stack LSTM
        for i in range(num_lstm_layer):
            if i==0:
                dp=0.
            else:
                dp = dropout
            with mx.AttrScope(ctx_group='layer%d' % i):
                next_state = lstm(num_hidden, indata=hidden,
                                  prev_state=last_states[i],
                                  param=param_cells[i],
                                  seqidx=seqidx, layeridx=i, dropout=dp)
                hidden = next_state.h
                last_states[i] = next_state

        # decoder
        if dropout > 0.:
            hidden = mx.sym.Dropout(data=hidden, p=dropout)
        last_hidden.append(hidden)

    out_prob = []
    if not concat_decode:
        for seqidx in range(seq_len):
            with mx.AttrScope(ctx_group='decode'):
                fc = mx.sym.FullyConnected(data=last_hidden[seqidx],
                                           weight=cls_weight,
                                           bias=cls_bias,
                                           num_hidden=num_label,
                                           name="t%d_cls" % seqidx)
                label = mx.sym.Variable("t%d_label" % seqidx)
                if use_loss:
                    sm = mx.sym.softmax_cross_entropy(fc, label, name="t%d_sm" % seqidx)
                else:
                    sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="t%d_sm" % seqidx)
                out_prob.append(sm)
    else:
        with mx.AttrScope(ctx_group='decode'):
            concat = mx.sym.Concat(*last_hidden, dim = 0)
            fc = mx.sym.FullyConnected(data=concat,
                                       weight=cls_weight,
                                       bias=cls_bias,
                                       num_hidden=num_label)
            label = mx.sym.Variable("label")
            if use_loss:
                sm = mx.sym.softmax_cross_entropy(fc, label, name="sm")
            else:
                sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="sm")
            out_prob = [sm]

    for i in range(num_lstm_layer):
        state = last_states[i]
        state = LSTMState(c=mx.sym.BlockGrad(state.c, name="l%d_last_c" % i),
                          h=mx.sym.BlockGrad(state.h, name="l%d_last_h" % i))
        last_states[i] = state

    unpack_c = [state.c for state in last_states]
    unpack_h = [state.h for state in last_states]
    list_all = out_prob + unpack_c + unpack_h
    return mx.sym.Group(list_all)
Exemplo n.º 29
0
 def g():
     with mx.AttrScope(x="hello"):
         event.wait()
         if "hello" in AttrScope.current._attr.values():
             status[0] = True
Exemplo n.º 30
0
 def g():
     with mx.AttrScope(x="hello"):
         e2.set()
         e1.wait()
         if "hello" in mx.attribute.current()._attr.values():
             status[0] = True