def check_module_ctx_group(ctxs, group2ctxs, grad_ctxs=None): with mx.AttrScope(ctx_group='dev1'): a = mx.symbol.Variable('a') a = a * 2 with mx.AttrScope(ctx_group='dev2'): b = mx.symbol.Variable('b') c = a + b shape = (2, 5) mod1 = mx.mod.Module(c, context=ctxs, data_names=['a', 'b'], label_names=None, group2ctxs=group2ctxs) mod1.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True) mod1.init_params() mod1.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True) mod1.backward([mx.nd.ones(shape)]) mod1_input_grads = mod1.get_input_grads() mod2 = mx.mod.Module(c, context=ctxs, data_names=['a', 'b'], label_names=None) mod2.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True) mod2.init_params() mod2.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True) mod2.backward([mx.nd.ones(shape)]) mod2_input_grads = mod2.get_input_grads() if grad_ctxs is not None: assert(mod1_input_grads[0].context == grad_ctxs[0]) assert(mod1_input_grads[1].context == grad_ctxs[1]) assert(np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy())) assert(np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy()))
def test_module_ctx_group(): with mx.AttrScope(ctx_group='dev1'): a = mx.symbol.Variable('a') a = a * 2 with mx.AttrScope(ctx_group='dev2'): b = mx.symbol.Variable('b') c = a + b shape = (2, 5) mod1 = mx.mod.Module(c, context=[mx.cpu(0)], data_names=['a', 'b'], label_names=None, group2ctxs=[{'dev1':mx.cpu(1),'dev2':mx.cpu(2)}]) mod1.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True) mod1.init_params() mod1.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True) mod1.backward([mx.nd.ones(shape)]) mod1_input_grads = mod1.get_input_grads() mod2 = mx.mod.Module(c, data_names=['a', 'b'], label_names=None) mod2.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True) mod2.init_params() mod2.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True) mod2.backward([mx.nd.ones(shape)]) mod2_input_grads = mod2.get_input_grads() assert np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy()) assert np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy())
def sym_gen(seq_len): with mx.AttrScope(ctx_group='dev1'): data = mx.symbol.Variable('data') weight = mx.symbol.Variable('dev1_weight') bias = mx.symbol.Variable('dev1_bias') fc = data for i in range(seq_len): fc = mx.symbol.FullyConnected(data=fc, weight=weight, bias=bias, name='dev1_fc_%d' % i, num_hidden=num_hidden) with mx.AttrScope(ctx_group='dev2'): label = mx.symbol.Variable('label') weight = mx.symbol.Variable('dev2_weight') bias = mx.symbol.Variable('dev2_bias') for i in range(seq_len): fc = mx.symbol.FullyConnected(data=fc, weight=weight, bias=bias, name='dev2_fc_%d' % i, num_hidden=num_hidden) sym = mx.symbol.SoftmaxOutput(fc, label, name='softmax') return sym, ('data', ), ('label', )
def test_load_000800(): with mx.AttrScope(ctx_group='stage1'): data = mx.symbol.Variable('data', lr_mult=0.2) weight = mx.sym.Variable(name='fc1_weight', lr_mult=1.2) fc1 = mx.symbol.FullyConnected(data = data, weight=weight, name='fc1', num_hidden=128, wd_mult=0.3) act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") set_stage1 = set(act1.list_arguments()) with mx.AttrScope(ctx_group='stage2'): fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, lr_mult=0.01) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) fc3 = mx.symbol.BatchNorm(fc3, name='batchnorm0') sym1 = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax') curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) sym2 = mx.sym.load(os.path.join(curr_path, 'save_000800.json')) attr1 = sym1.attr_dict() attr2 = sym2.attr_dict() for k, v1 in attr1.items(): assert k in attr2, k v2 = attr2[k] for kk, vv1 in v1.items(): if kk.startswith('__') and kk.endswith('__'): assert kk in v2 and v2[kk] == vv1, k + str(v1) + str(v2) check_symbol_consistency(sym1, sym2, {'ctx': mx.cpu(0), 'group2ctx': {'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2)}, 'data': (1,200)})
def test_ctx_group(): with mx.AttrScope(ctx_group='stage1'): data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") set_stage1 = set(act1.list_arguments()) with mx.AttrScope(ctx_group='stage2'): fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) fc3 = mx.symbol.BatchNorm(fc3) mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax') set_stage2 = set(mlp.list_arguments()) - set_stage1 group2ctx = { 'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2) } texec = mlp.simple_bind(mx.cpu(0), group2ctx=group2ctx, data=(1,200)) for arr, name in zip(texec.arg_arrays, mlp.list_arguments()): if name in set_stage1: assert arr.context == group2ctx['stage1'] else: assert arr.context == group2ctx['stage2']
def test_operator(): data = mx.symbol.Variable('data') with mx.AttrScope(group='4', data='great'): fc1 = mx.symbol.Activation(data, act_type='relu') with mx.AttrScope(init_bias='0.0'): fc2 = mx.symbol.FullyConnected(fc1, num_hidden=10, name='fc2') assert fc1.attr('data') == 'great' fc2copy = pkl.loads(pkl.dumps(fc2)) assert fc2copy.tojson() == fc2.tojson() fc2weight = fc2.get_internals()['fc2_weight']
def test_chain(): ctx1 = mx.cpu(0) ctx2 = mx.cpu(1) n = 2 data1 = mx.sym.Variable('data1') data2 = mx.sym.Variable('data2') data3 = mx.sym.Variable('data3') with mx.AttrScope(ctx_group='dev1'): net = data1 + data2 net = net * 3 with mx.AttrScope(ctx_group='dev2'): net = net + data3 arr = [] arr_grad = [] shape = (4, 5) with mx.Context(ctx1): for i in range(n): arr.append(mx.nd.empty(shape)) arr_grad.append(mx.nd.empty(shape)) with mx.Context(ctx2): arr.append(mx.nd.empty(shape)) arr_grad.append(mx.nd.empty(shape)) exec1 = net.bind(ctx1, args=arr, args_grad=arr_grad, group2ctx={ 'dev1': ctx1, 'dev2': ctx2 }) arr[0][:] = 1.0 arr[1][:] = 2.0 arr[2][:] = 3.0 arr2 = [a.copyto(ctx1) for a in arr] arr_grad2 = [a.copyto(ctx1) for a in arr_grad] exec2 = net.bind(ctx1, args=arr2, args_grad=arr_grad2) # Show the execution plan that involves copynode print(exec1.debug_str()) exec1.forward(is_train=True) exec2.forward(is_train=True) assert reldiff(exec1.outputs[0].asnumpy(), exec2.outputs[0].asnumpy()) < 1e-6 out_grad = mx.nd.empty(shape, ctx1) out_grad[:] = 1.0 exec1.backward([out_grad]) exec2.backward([out_grad.copyto(ctx1)]) for a, b in zip(arr_grad, arr_grad2): assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6
def _attr_scope_lr(attr_type): # weight (lr_mult, wd_mult); bias; # 1, 1; 2, 0; # so apply this to bias only if attr_type == 'alex': return mx.AttrScope(lr_mult='2.', wd_mult='0.') # 0, 0; 0, 0; # so apply this to both if attr_type == 'fixed': return mx.AttrScope(lr_mult='0.', wd_mult='0.') # 1, 1; 1, 1; # so it is ok to do nothing else: return mx.AttrScope()
def matrix_fact_model_parallel_net(factor_size, num_hidden, max_user, max_item): # set ctx_group attribute to 'dev1' for the symbols created in this scope, # the symbols will be bound to the context that 'dev1' map to in group2ctxs with mx.AttrScope(ctx_group='dev1'): # input user = mx.symbol.Variable('user') item = mx.symbol.Variable('item') # user feature lookup user_weight = mx.symbol.Variable('user_weight') user = mx.symbol.Embedding(data=user, weight=user_weight, input_dim=max_user, output_dim=factor_size) # item feature lookup item_weight = mx.symbol.Variable('item_weight') item = mx.symbol.Embedding(data=item, weight=item_weight, input_dim=max_item, output_dim=factor_size) # set ctx_group attribute to 'dev2' for the symbols created in this scope, # the symbols will be bound to the context that 'dev2' map to in group2ctxs with mx.AttrScope(ctx_group='dev2'): # non-linear transformation of user features user = mx.symbol.Activation(data=user, act_type='relu') fc_user_weight = mx.symbol.Variable('fc_user_weight') fc_user_bias = mx.symbol.Variable('fc_user_bias') user = mx.symbol.FullyConnected(data=user, weight=fc_user_weight, bias=fc_user_bias, num_hidden=num_hidden) # non-linear transformation of user features item = mx.symbol.Activation(data=item, act_type='relu') fc_item_weight = mx.symbol.Variable('fc_item_weight') fc_item_bias = mx.symbol.Variable('fc_item_bias') item = mx.symbol.FullyConnected(data=item, weight=fc_item_weight, bias=fc_item_bias, num_hidden=num_hidden) # predict by the inner product, which is element-wise product and then sum pred = user * item pred = mx.symbol.sum(data=pred, axis=1) pred = mx.symbol.Flatten(data=pred) # label score = mx.symbol.Variable('score') # loss layer pred = mx.symbol.LinearRegressionOutput(data=pred, label=score) return pred
def get_symbol(args, arg_params, aux_params): data_shape = (args.image_channel,args.image_h,args.image_w) image_shape = ",".join([str(x) for x in data_shape]) margin_symbols = [] print('init %s, num_layers: %d' % (network, num_layers)) with mx.AttrScope(ctx_group='dev0'): embedding = eval(network).get_symbol(args.emb_size, num_layers, shake_drop=args.shake_drop, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit, version_act=args.version_act, width_mult = args.width_mult, version_bn=args.version_bn, bn_mom = args.bn_mom) gt_label = mx.symbol.Variable('softmax_label') nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n') anchor = mx.symbol.slice_axis(nembedding, axis=0, begin=0, end=args.per_batch_size//3) positive = mx.symbol.slice_axis(nembedding, axis=0, begin=args.per_batch_size//3, end=2*args.per_batch_size//3) negative = mx.symbol.slice_axis(nembedding, axis=0, begin=2*args.per_batch_size//3, end=args.per_batch_size) ap = anchor - positive an = anchor - negative ap = ap*ap an = an*an ap = mx.symbol.sum(ap, axis=1, keepdims=1) #(T,1) an = mx.symbol.sum(an, axis=1, keepdims=1) #(T,1) triplet_loss = mx.symbol.Activation(data = (ap-an+args.triplet_alpha), act_type='relu') triplet_loss = mx.symbol.mean(triplet_loss) #triplet_loss = mx.symbol.sum(triplet_loss)/(args.per_batch_size//3) triplet_loss = mx.symbol.MakeLoss(triplet_loss) out_list = [mx.symbol.BlockGrad(embedding)] out_list.append(mx.sym.BlockGrad(gt_label)) out_list.append(triplet_loss) out = mx.symbol.Group(out_list) return (out, arg_params, aux_params)
def bn(data, name, eps=1e-5, fix_gamma=False, use_global_stats=None): if use_global_stats is None: use_global_stats = cfg.get('use_global_stats', False) if fix_gamma: with mx.AttrScope(lr_mult='0.', wd_mult='0.'): gamma = mx.sym.Variable('{}_gamma'.format(name)) beta = mx.sym.Variable('{}_beta'.format(name)) return mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name, eps=eps, fix_gamma=True, use_global_stats=use_global_stats) else: lr_type = cfg.get('lr_type', 'torch') assert lr_type in ('alex', 'torch') with _attr_scope_lr(lr_type): beta = mx.sym.Variable('{}_beta'.format(name)) return mx.sym.BatchNorm(data=data, beta=beta, name=name, eps=eps, fix_gamma=False, use_global_stats=use_global_stats)
def test_attr_basic(): with mx.AttrScope(group='4', data='great'): data = mx.symbol.Variable('data', attr={'dtype': 'data', 'group': '1'}) gdata = mx.symbol.Variable('data2') assert gdata.attr('group') == '4' assert data.attr('group') == '1' data2 = pkl.loads(pkl.dumps(data)) assert data.attr('dtype') == data2.attr('dtype')
def _attr_scope_lr(lr_type, lr_owner): assert lr_type in ('alex', 'alex10', 'torch', 'psp', 'mxnet') # weight (lr_mult, wd_mult); bias; # 1, 1; 2, 0; if lr_type == 'alex': if lr_owner == 'weight': return mx.AttrScope() elif lr_owner == 'bias': return mx.AttrScope(lr_mult='2.', wd_mult='0.') else: assert False # 10, 1; 20, 0; if lr_type == 'alex10': if lr_owner == 'weight': return mx.AttrScope(lr_mult='10.', wd_mult='1.') elif lr_owner == 'bias': return mx.AttrScope(lr_mult='20.', wd_mult='0.') else: assert False # 0, 0; 0, 0; # so apply this to both if lr_type == 'fixed': assert lr_owner in ('weight', 'bias') return mx.AttrScope(lr_mult='0.', wd_mult='0.') # 1, 0; 1, 0; if lr_type == 'psp': assert lr_owner in ('weight', 'bias') return mx.AttrScope(wd_mult='0.') if lr_type == "mxnet": return mx.AttrScope()
def Softmax(embedding, gt_label, name, args, cvd=None): if cvd is None: _weight = mx.symbol.Variable(name + "_weight", shape=(args.ctx_num_classes, args.emb_size), lr_mult=args.fc7_lr_mult, wd_mult=args.fc7_wd_mult, init=mx.init.Normal(0.01)) if args.fc7_no_bias: fc7 = mx.sym.FullyConnected(data=embedding, weight=_weight, no_bias=True, num_hidden=args.ctx_num_classes, name=name) else: _bias = mx.symbol.Variable(name + '_bias', lr_mult=2.0, wd_mult=0.0) fc7 = mx.sym.FullyConnected(data=embedding, weight=_weight, bias=_bias, num_hidden=args.ctx_num_classes, name=name) return fc7 else: fc7_subs = [] for ctx_id in range(len(cvd)): with mx.AttrScope(ctx_group='dev%d' % (ctx_id + 1)): _weight = mx.symbol.Variable(name % ctx_id + "_weight", shape=(args.ctx_num_classes, args.emb_size), lr_mult=args.fc7_lr_mult, wd_mult=args.fc7_wd_mult, init=mx.init.Normal(0.01)) if args.fc7_no_bias: fc7_sub = mx.sym.FullyConnected( data=embedding, weight=_weight, no_bias=True, num_hidden=args.ctx_num_classes, name=name % ctx_id) else: _bias = mx.symbol.Variable(name % ctx_id + '_bias', lr_mult=2.0, wd_mult=0.0) fc7_sub = mx.sym.FullyConnected( data=embedding, weight=_weight, bias=_bias, num_hidden=args.ctx_num_classes, name=name % ctx_id) fc7_subs.append(fc7_sub) fc7 = mx.sym.concat(*fc7_subs, dim=1, name=name + '_concat') return fc7
def test_chain(): n = 2 data1 = mx.sym.Variable('data1') data2 = mx.sym.Variable('data2') with mx.AttrScope(ctx_group='dev1'): net = data1 + data2 net = net * 3 with mx.AttrScope(ctx_group='dev2'): net = net + data1 with mx.Context(mx.cpu(0)): shape = (4, 5) arr = [mx.nd.empty(shape) for i in range(n)] arr_grad = [mx.nd.empty(shape) for i in range(n)] exec1 = net.bind(mx.cpu(), args=arr, args_grad=arr_grad, group2ctx={ 'dev1': mx.cpu(0), 'dev2': mx.cpu(1) }) arr[0][:] = 1.0 arr[1][:] = 2.0 arr2 = [a.copyto(mx.cpu()) for a in arr] arr_grad2 = [a.copyto(mx.cpu()) for a in arr_grad] exec2 = net.bind(mx.cpu(), args=arr2, args_grad=arr_grad2) # Show the execution plan that involves copynode print(exec1.debug_str()) exec1.forward() exec2.forward() assert reldiff(exec1.outputs[0].asnumpy(), exec2.outputs[0].asnumpy()) < 1e-6 out_grad = mx.nd.empty(shape, mx.cpu(1)) out_grad[:] = 1.0 exec1.backward([out_grad]) exec2.backward([out_grad.copyto(mx.cpu())]) for a, b in zip(arr_grad, arr_grad2): assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6
def encode(self, data: mx.sym.Symbol, data_length: mx.sym.Symbol, seq_len: int) -> mx.sym.Symbol: """ Encodes data given sequence lengths of individual examples (data_length) and maximum sequence length (seq_len). :param data: Input data. :param data_length: Vector with sequence lengths. :param seq_len: Maximum sequence length. :return: Encoded input data. """ with mx.AttrScope(__layout__=C.TIME_MAJOR): return mx.sym.swapaxes(data=data, dim1=0, dim2=1)
def encode(self, data: mx.sym.Symbol, data_length: Optional[mx.sym.Symbol], seq_len: int) -> Tuple[mx.sym.Symbol, mx.sym.Symbol, int]: """ Encodes data given sequence lengths of individual examples and maximum sequence length. :param data: Input data. :param data_length: Vector with sequence lengths. :param seq_len: Maximum sequence length. :return: Encoded versions of input data (data, data_length, seq_len). """ with mx.AttrScope(__layout__=self.target_layout): return mx.sym.swapaxes(data=data, dim1=0, dim2=1), data_length, seq_len
def check_ctx_group_sparse(lhs_stype, rhs_stype): with mx.AttrScope(ctx_group='stage1'): lhs = mx.symbol.Variable('lhs', storage_type=lhs_stype) rhs = mx.symbol.Variable('rhs', storage_type=rhs_stype) plus = mx.symbol.elemwise_add(lhs, rhs, name='plus') set_stage1 = set(plus.list_arguments()) with mx.AttrScope(ctx_group='stage2'): softmax = mx.symbol.SoftmaxOutput(data=plus, name='softmax') set_stage2 = set(softmax.list_arguments()) - set_stage1 group2ctx = {'stage1': mx.cpu(1), 'stage2': mx.cpu(2)} texec = softmax.simple_bind(mx.cpu(0), group2ctx=group2ctx, lhs=(1, 200), rhs=(1, 200)) for arr, name in zip(texec.arg_arrays, softmax.list_arguments()): if name in set_stage1: assert arr.context == group2ctx['stage1'] else: assert arr.context == group2ctx['stage2']
def test_ctx_group_sparse(): with mx.AttrScope(ctx_group='stage1'): lhs = mx.symbol.Variable('lhs', stype='csr') rhs = mx.symbol.Variable('rhs', stype='row_sparse') dot = mx.symbol.dot(lhs, rhs, name='dot') set_stage1 = set(dot.list_arguments()) with mx.AttrScope(ctx_group='stage2'): softmax = mx.symbol.SoftmaxOutput(data = dot, name = 'softmax') set_stage2 = set(softmax.list_arguments()) - set_stage1 group2ctx = { 'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2) } texec = softmax.simple_bind(mx.cpu(0), group2ctx=group2ctx, lhs=(32,200), rhs=(200, 5)) for arr, name in zip(texec.arg_arrays, softmax.list_arguments()): if name in set_stage1: assert arr.context == group2ctx['stage1'] else: assert arr.context == group2ctx['stage2']
def test_attr_basic(): with mx.AttrScope(group='4', data='great'): data = mx.symbol.Variable('data', attr={ 'dtype': 'data', 'group': '1', 'force_mirroring': 'True' }, lr_mult=1) gdata = mx.symbol.Variable('data2') assert gdata.attr('group') == '4' assert data.attr('group') == '1' assert data.attr('lr_mult') == '1' assert data.attr('__lr_mult__') == '1' assert data.attr('force_mirroring') == 'True' assert data.attr('__force_mirroring__') == 'True' data2 = pkl.loads(pkl.dumps(data)) assert data.attr('dtype') == data2.attr('dtype')
def bn(self, data, name, eps=1.001e-5, fix_gamma=False, use_global_stats=None, cudnn_off=False, relu=False): """ batch normalization wrapper """ if use_global_stats is None: use_global_stats = cfg.get('bn_use_global_stats', False) if fix_gamma: with mx.AttrScope(lr_mult='0.', wd_mult='0.'): gamma = mx.sym.Variable('{}_gamma'.format(name)) beta = mx.sym.Variable('{}_beta'.format(name)) output = mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name, eps=eps, fix_gamma=True, use_global_stats=use_global_stats, cudnn_off=cudnn_off) else: lr_type = cfg.get('lr_type', 'torch') with _attr_scope_lr(lr_type, 'weight'): gamma = mx.sym.Variable('{}_gamma'.format(name)) with _attr_scope_lr(lr_type, 'bias'): beta = mx.sym.Variable('{}_beta'.format(name)) output = mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name, eps=eps, fix_gamma=False, use_global_stats=use_global_stats) if relu: output = mx.sym.relu(output) return output
def __init__(self, input_order = 3, input_size =100, core_number = 4, core_size = 25): """ input_order (int): Order of input-CNN filter input_size (int): Size of input vector core_number (int): Number of switching core core_size (int): Size of switching core """ self._io = input_order self._is = input_size self._cn = core_number self._cs = core_size with mx.AttrScope(group='Swth', data='Command'): self.initials()
def _attr_scope_lr(lr_type, lr_owner): assert lr_owner in ('weight', 'bias') if lr_type == 'alex': if lr_owner == 'weight': return mx.AttrScope(lr_mult='1.', wd_mult='1.') elif lr_owner == 'bias': return mx.AttrScope(lr_mult='2.', wd_mult='0.') elif lr_type == 'alex10': if lr_owner == 'weight': return mx.AttrScope(lr_mult='10.', wd_mult='1.') elif lr_owner == 'bias': return mx.AttrScope(lr_mult='20.', wd_mult='0.') elif lr_type == 'torch10': if lr_owner == 'weight': return mx.AttrScope(lr_mult='10.', wd_mult='1.') elif lr_owner == 'bias': return mx.AttrScope(lr_mult='10.', wd_mult='0.') elif lr_type == 'zeros': return mx.AttrScope(lr_mult='0.', wd_mult='0.') else: return mx.AttrScope()
def get_module(args, data_shapes): network, num_layers = args.network.split(',') num_layers = int(num_layers) data_shape = (3, 112, 112) image_shape = ",".join([str(x) for x in data_shape]) with mx.AttrScope(ctx_group='dev0'): embedding = eval(network).get_symbol(args.emb_size, num_layers, shake_drop=args.shake_drop, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit, version_act=args.version_act, width_mult = args.width_mult, version_bn=args.version_bn, bn_mom = args.bn_mom) #out_list = [mx.symbol.BlockGrad(embedding)] #sym = mx.symbol.Group(out_list) ctx = [mx.gpu(0)] #all_layers = sym.get_internals() all_layers = embedding.get_internals() out_list = [all_layers[layer] for layer in['bn1_output', 'fc1_output']] sym = mx.symbol.Group(out_list) model = mx.mod.Module( context = ctx, symbol = sym, data_names = ['data'], label_names = None ) model.bind(for_training=False, data_shapes=data_shapes) if args.pretrained != '': print(args.pretrained) vec = args.pretrained.split(',') _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) model.set_params(arg_params, aux_params, allow_extra=True, allow_missing=True) return model
def __init__(self, input_order=3, input_size=100, core_number=4, core_size=25): """ input_order (int): Order of input-CNN filter input_size (int): Size of input vector core_number (int): Number of switching core core_size (int): Size of switching core """ self._io = input_order self._is = input_size self._cn = core_number self._cs = core_size self.ctrl = _CNNControl(self._io, self._is, self._cn, self._cs) self.swch = _SoftmaxSwitch(self._io, self._is, self._cn, self._cs) with mx.AttrScope(group='RSU'): self.initials()
def get_symbol(network, num_layers, args, arg_params, aux_params): data_shape = (args.image_channel,args.image_h,args.image_w) image_shape = ",".join([str(x) for x in data_shape]) margin_symbols = [] print('init %s, num_layers: %d' % (network, num_layers)) with mx.AttrScope(ctx_group='dev0'): embedding = eval(network).get_symbol(args.emb_size, num_layers, shake_drop=args.shake_drop, version_se=args.version_se, version_input=args.version_input, version_output=args.version_output, version_unit=args.version_unit, version_act=args.version_act, width_mult = args.width_mult, version_bn=args.version_bn, bn_mom = args.bn_mom, use_global_stats=True) if network=='fspherenet': data_shape_dict = {'data' : (args.per_batch_size,)+data_shape} fspherenet.init_weights(sym, data_shape_dict, int(num_layers)) gt_label = mx.symbol.Variable('softmax_label') label = mx.sym.slice_axis(gt_label, axis=0, begin=0, end=args.per_batch_size // 2) nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n') nembedding1 = mx.symbol.slice_axis(nembedding, axis=0, begin=0, end=args.per_batch_size//2) nembedding2 = mx.symbol.slice_axis(nembedding, axis=0, begin=args.per_batch_size//2, end=2*args.per_batch_size//2) cos_simi = mx.sym.sum(nembedding1 * nembedding2, axis=1, keepdims=1) target = mx.sym.where(label, 0.64 - cos_simi, cos_simi - 0.36) pairwise_loss = mx.symbol.clip(target, 0, 1) pairwise_loss = mx.symbol.mean(pairwise_loss) pairwise_loss = mx.symbol.MakeLoss(pairwise_loss) out_list = [mx.symbol.BlockGrad(embedding)] out_list.append(mx.sym.BlockGrad(gt_label)) out_list.append(mx.sym.BlockGrad(cos_simi)) out_list.append(mx.sym.BlockGrad(target)) out_list.append(pairwise_loss) out = mx.symbol.Group(out_list) return (out, arg_params, aux_params)
def get_fc7(embedding, name, args, cvd=None): nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name=name + '_norm') if cvd is None: _weight = mx.symbol.Variable(name + "_weight", shape=(args.ctx_num_classes, args.emb_size), attr={ 'lr_mult': str(args.fc7_lr_mult), 'wd_mult': str(args.fc7_wd_mult) }, init=mx.init.Normal(0.01)) fc7 = mx.sym.FullyConnected(data=nembedding, weight=_weight, num_hidden=args.ctx_num_classes, no_bias=True, normalize=True, name=name) else: fc7_subs = [] for ctx_id in range(len(cvd)): with mx.AttrScope(ctx_group='dev%d' % (ctx_id + 1)): _weight = mx.symbol.Variable(name % ctx_id + '_weight', shape=(args.ctx_num_classes, args.emb_size)) fc7_sub = mx.sym.FullyConnected( data=nembedding, weight=_weight, num_hidden=args.ctx_num_classes, no_bias=True, normalize=True, name=name % ctx_id) fc7_subs.append(fc7_sub) fc7 = mx.sym.concat(*fc7_subs, dim=1, name=name + '_concat') return fc7
def lstm_unroll(num_lstm_layer, seq_len, input_size, num_hidden, num_embed, num_label, dropout=0., concat_decode=True, use_loss=False): """unrolled lstm network""" # initialize the parameter symbols with mx.AttrScope(ctx_group='embed'): embed_weight=mx.sym.Variable("embed_weight") with mx.AttrScope(ctx_group='decode'): cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] for i in range(num_lstm_layer): with mx.AttrScope(ctx_group='layer%d' % i): param_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i), i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i), h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i), h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i))) state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i), h=mx.sym.Variable("l%d_init_h" % i)) last_states.append(state) assert(len(last_states) == num_lstm_layer) last_hidden = [] for seqidx in range(seq_len): # embeding layer with mx.AttrScope(ctx_group='embed'): data = mx.sym.Variable("t%d_data" % seqidx) hidden = mx.sym.Embedding(data=data, weight=embed_weight, input_dim=input_size, output_dim=num_embed, name="t%d_embed" % seqidx) # stack LSTM for i in range(num_lstm_layer): if i==0: dp=0. else: dp = dropout with mx.AttrScope(ctx_group='layer%d' % i): next_state = lstm(num_hidden, indata=hidden, prev_state=last_states[i], param=param_cells[i], seqidx=seqidx, layeridx=i, dropout=dp) hidden = next_state.h last_states[i] = next_state # decoder if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) last_hidden.append(hidden) out_prob = [] if not concat_decode: for seqidx in range(seq_len): with mx.AttrScope(ctx_group='decode'): fc = mx.sym.FullyConnected(data=last_hidden[seqidx], weight=cls_weight, bias=cls_bias, num_hidden=num_label, name="t%d_cls" % seqidx) label = mx.sym.Variable("t%d_label" % seqidx) if use_loss: sm = mx.sym.softmax_cross_entropy(fc, label, name="t%d_sm" % seqidx) else: sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="t%d_sm" % seqidx) out_prob.append(sm) else: with mx.AttrScope(ctx_group='decode'): concat = mx.sym.Concat(*last_hidden, dim = 0) fc = mx.sym.FullyConnected(data=concat, weight=cls_weight, bias=cls_bias, num_hidden=num_label) label = mx.sym.Variable("label") if use_loss: sm = mx.sym.softmax_cross_entropy(fc, label, name="sm") else: sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="sm") out_prob = [sm] for i in range(num_lstm_layer): state = last_states[i] state = LSTMState(c=mx.sym.BlockGrad(state.c, name="l%d_last_c" % i), h=mx.sym.BlockGrad(state.h, name="l%d_last_h" % i)) last_states[i] = state unpack_c = [state.c for state in last_states] unpack_h = [state.h for state in last_states] list_all = out_prob + unpack_c + unpack_h return mx.sym.Group(list_all)
def g(): with mx.AttrScope(x="hello"): event.wait() if "hello" in AttrScope.current._attr.values(): status[0] = True
def g(): with mx.AttrScope(x="hello"): e2.set() e1.wait() if "hello" in mx.attribute.current()._attr.values(): status[0] = True