def run(self): data_validate = mx.io.CSVIter(data_csv="../validate-64x64-data.csv", data_shape=(30, 64, 64), batch_size=1) network = get_lenet() batch_size = 32 devs = [mx.cpu(0), mx.cpu(0), mx.cpu(0), mx.cpu(0)] #..distribute to multiple cores data_train = mx.io.CSVIter(data_csv=self.input()['data'].path, data_shape=(30, 64, 64), label_csv=self.input()['label'].path, label_shape=(600,), batch_size=batch_size) print "\n%d epochs\n" % self.tune_epoch() model = mx.model.FeedForward(ctx=devs, symbol = network, num_epoch = self.tune_epoch(), learning_rate = 0.001, wd = 0.00001, momentum = 0.9) model.fit(X=data_train, eval_metric = mx.metric.np(CRPS)) prob = model.predict(data_validate) prob_fname = "%s_prob" % self.name try: np.save(prob_fname, prob) except: pickle.dump(prob, open(prob_fname + '.p', 'wb')) pickle.dump(model, open(self.output().path, 'wb'))
def test_convolution_with_type(): np.random.seed(1234) sym1 = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv') data = mx.sym.Variable('conv_data') w = mx.sym.Variable('conv_weight') b = mx.sym.Variable('conv_bias') w = mx.sym.transpose(w, axes=(0,2,3,1)) sym2 = mx.sym.transpose(data, axes=(0,2,3,1)) sym2 = mx.sym.Convolution(sym2, w, b, layout='NHWC', num_filter=3, kernel=(3,3)) sym2 = mx.sym.transpose(sym2, axes=(0,3,1,2), name='conv') sym = [sym1, sym1, sym1, sym1, sym1, sym2, sym2] ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float16}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}, # NHWC {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'conv_weight': (3, 2, 3, 3), 'type_dict': {'conv_data': np.float32, 'conv_weight': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'conv_weight': (3, 2, 3, 3), 'type_dict': {'conv_data': np.float16, 'conv_weight': np.float16}} ] # wider tolerance needed for true-fp16 NCHW test above tol = {np.dtype(np.float16): 0.5, np.dtype(np.float32): 1e-3, np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} check_consistency(sym, ctx_list, tol=tol) # test ability to turn off training on bias check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, tol=tol)
def test_elementwisesum_with_type(): sym = mx.sym.ElementWiseSum(name="ews", num_args=2) ctx_list = [ { "ctx": mx.gpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float64, "ews_arg1": np.float64}, }, { "ctx": mx.gpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float32, "ews_arg1": np.float32}, }, { "ctx": mx.gpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float16, "ews_arg1": np.float16}, }, { "ctx": mx.cpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float64, "ews_arg1": np.float64}, }, { "ctx": mx.cpu(0), "ews_arg1": (2, 10), "ews_arg0": (2, 10), "type_dict": {"ews_arg0": np.float32, "ews_arg1": np.float32}, }, ] check_consistency(sym, ctx_list)
def test_load_000800(): with mx.AttrScope(ctx_group='stage1'): data = mx.symbol.Variable('data', lr_mult=0.2) weight = mx.sym.Variable(name='fc1_weight', lr_mult=1.2) fc1 = mx.symbol.FullyConnected(data = data, weight=weight, name='fc1', num_hidden=128, wd_mult=0.3) act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") set_stage1 = set(act1.list_arguments()) with mx.AttrScope(ctx_group='stage2'): fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, lr_mult=0.01) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) fc3 = mx.symbol.BatchNorm(fc3, name='batchnorm0') sym1 = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax') curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) sym2 = mx.sym.load(os.path.join(curr_path, 'save_000800.json')) attr1 = sym1.attr_dict() attr2 = sym2.attr_dict() for k, v1 in attr1.items(): assert k in attr2, k v2 = attr2[k] for kk, vv1 in v1.items(): if kk.startswith('__') and kk.endswith('__'): assert kk in v2 and v2[kk] == vv1, k + str(v1) + str(v2) check_symbol_consistency(sym1, sym2, {'ctx': mx.cpu(0), 'group2ctx': {'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2)}, 'data': (1,200)})
def test_activation_with_type(): sym = mx.sym.Activation(name='act', act_type='sigmoid') ctx_list = [{'ctx': mx.gpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float64}}, {'ctx': mx.gpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float32}}, {'ctx': mx.cpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float64}}, {'ctx': mx.cpu(0), 'act_data': (2, 2, 10, 10), 'type_dict': {'act_data': np.float32}}] check_consistency(sym, ctx_list)
def test_convolution_with_type(): sym = mx.sym.Convolution(num_filter=3, kernel=(3,3), name='conv') ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float64}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 10, 10), 'type_dict': {'conv_data': np.float32}}] check_consistency(sym, ctx_list)
def test_bucket_module_ctx_group(): num_hidden = 10 batch_size = 5 def sym_gen(seq_len): with mx.AttrScope(ctx_group='dev1'): data = mx.symbol.Variable('data') weight = mx.symbol.Variable('dev1_weight') bias = mx.symbol.Variable('dev1_bias') fc = data for i in range(seq_len): fc = mx.symbol.FullyConnected(data=fc, weight=weight, bias=bias, name='dev1_fc_%d' % i, num_hidden=num_hidden) with mx.AttrScope(ctx_group='dev2'): label = mx.symbol.Variable('label') weight = mx.symbol.Variable('dev2_weight') bias = mx.symbol.Variable('dev2_bias') for i in range(seq_len): fc = mx.symbol.FullyConnected(data=fc, weight=weight, bias=bias, name='dev2_fc_%d' % i, num_hidden=num_hidden) sym = mx.symbol.SoftmaxOutput(fc, label, name='softmax') return sym, ('data',), ('label',) mod = mx.mod.BucketingModule(sym_gen=sym_gen, default_bucket_key=10, context=[mx.cpu(0)], group2ctxs=[{'dev1': mx.cpu(1), 'dev2': mx.cpu(2)}]) mod.bind(data_shapes=[['data', (batch_size, num_hidden)]], label_shapes=[['label', (batch_size,)]], for_training=True, inputs_need_grad=True) assert(mod.binded)
def test_save_load(): net = mx.gluon.model_zoo.vision.get_resnet(1, 18, pretrained=True) net.save_parameters('test_save_load.params') net = mx.gluon.model_zoo.vision.get_resnet(1, 18) net.output = mx.gluon.nn.Dense(1000) net.load_parameters('test_save_load.params') class Network(gluon.Block): def __init__(self, **kwargs): super(Network, self).__init__(**kwargs) with self.name_scope(): self.encoders = gluon.nn.Sequential() with self.encoders.name_scope(): for _ in range(2): lstm = mx.gluon.rnn.LSTM(200, 1, bidirectional=True) self.encoders.add(lstm) def forward(self, x): for i in range(2): x = self.encoders[i](x) return x net = Network() net.initialize(mx.init.Xavier(), ctx=mx.cpu()) net.hybridize() x = np.random.rand(32, 10, 10) x = mx.nd.array(x).as_in_context(mx.cpu()) net(x) net.save_parameters('tmp.params') net2 = Network() net2.load_parameters('tmp.params')
def test_module_states(): stack = mx.rnn.SequentialRNNCell() for i in range(2): stack.add(mx.rnn.LSTMCell(num_hidden=20, prefix='lstm_l%d_'%i)) begin_state = stack.begin_state(func=mx.sym.Variable) _, states = stack.unroll(10, begin_state=begin_state, inputs=mx.sym.Variable('data')) state_names = [i.name for i in begin_state] mod = mx.mod.Module(mx.sym.Group(states), context=[mx.cpu(0), mx.cpu(1)], label_names=None, state_names=state_names) mod.bind(data_shapes=[('data', (5, 10))], label_shapes=None, for_training=False) mod.init_params() batch = mx.io.DataBatch(data=[mx.nd.zeros((5, 10))], label=[]) mod.set_states(value=1) mod.forward(batch) out = mod.get_outputs(merge_multi_context=False) out1 = mod.get_outputs(merge_multi_context=True) mod.set_states(states=out) mod.forward(batch) out2 = mod.get_outputs(merge_multi_context=True) for x1, x2 in zip(out1, out2): assert not mx.test_utils.almost_equal(x1.asnumpy(), x2.asnumpy(), rtol=1e-3)
def test_module_reshape(): data = mx.sym.Variable('data') sym = mx.sym.FullyConnected(data, num_hidden=20, name='fc') dshape = (7, 20) mod = mx.mod.Module(sym, ('data',), None, context=[mx.cpu(0), mx.cpu(1)]) mod.bind(data_shapes=[('data', dshape)]) mod.init_params() mod.init_optimizer(optimizer_params={'learning_rate': 1}) mod.forward(mx.io.DataBatch(data=[mx.nd.ones(dshape)], label=None)) mod.backward([mx.nd.ones(dshape)]) mod.update() assert mod.get_outputs()[0].shape == dshape assert (mod.get_params()[0]['fc_bias'].asnumpy() == -1).all() dshape = (14, 20) mod.reshape(data_shapes=[('data', dshape)]) mod.forward(mx.io.DataBatch(data=[mx.nd.ones(dshape)], label=None)) mod.backward([mx.nd.ones(dshape)]) mod.update() assert mod.get_outputs()[0].shape == dshape assert (mod.get_params()[0]['fc_bias'].asnumpy() == -3).all()
def test_convolution_grouping(): num_filter = 4 num_group = 2 kernel = (3, 3) shape = (1, 4, 9, 9) x = mx.sym.Variable('x') w = mx.sym.Variable('w') b = mx.sym.Variable('b') y1 = mx.sym.Convolution(data=x, weight=w, bias=b, num_filter=num_filter, num_group=num_group, kernel=kernel) xslice = mx.sym.SliceChannel(data=x, num_outputs=num_group, axis=1) wslice = mx.sym.SliceChannel(data=w, num_outputs=num_group, axis=0) bslice = mx.sym.SliceChannel(data=b, num_outputs=num_group, axis=0) y2 = mx.sym.Concat(*[mx.sym.Convolution(data=xslice[i], weight=wslice[i], bias=bslice[i], num_filter=num_filter//num_group, kernel=kernel) for i in range(num_group)]) exe1 = y1.simple_bind(mx.cpu(), x=shape) exe2 = y2.simple_bind(mx.cpu(), x=shape, w=(num_filter, shape[1]//num_group, kernel[0], kernel[1]), b=(num_filter,)) for arr1, arr2 in zip(exe1.arg_arrays, exe2.arg_arrays): arr1[:] = np.random.normal(size=arr1.shape) arr2[:] = arr1 exe1.forward(is_train=True) exe1.backward(exe1.outputs[0]) exe2.forward(is_train=True) exe2.backward(exe2.outputs[0]) for arr1, arr2 in zip(exe1.outputs + exe1.grad_arrays, exe2.outputs + exe2.grad_arrays): np.testing.assert_allclose(arr1.asnumpy(), arr2.asnumpy(), rtol=1e-3)
def __init__(self, seq_len, input_size, num_hidden, num_embed, num_label, arg_params, ctx=mx.cpu(), dropout=0.): self.sym = bi_lstm_inference_symbol(input_size, seq_len, num_hidden, num_embed, num_label, dropout) batch_size = 1 init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(2)] init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(2)] data_shape = [("data", (batch_size, seq_len, ))] input_shapes = dict(init_c + init_h + data_shape) self.executor = self.sym.simple_bind(ctx=mx.cpu(), **input_shapes) for key in self.executor.arg_dict.keys(): if key in arg_params: arg_params[key].copyto(self.executor.arg_dict[key]) state_name = [] for i in range(2): state_name.append("l%d_init_c" % i) state_name.append("l%d_init_h" % i) self.states_dict = dict(zip(state_name, self.executor.outputs[1:])) self.input_arr = mx.nd.zeros(data_shape[0][1])
def test_paramdict(): params = gluon.ParameterDict('net_') params.get('weight', shape=(10, 10)) assert list(params.keys()) == ['net_weight'] params.initialize(ctx=mx.cpu()) params.save('test.params') params.load('test.params', mx.cpu())
def validate(val_data, val_dataset, net, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] val_metric.reset() from tqdm import tqdm for batch in tqdm(val_data): data, scale, center, score, imgid = val_batch_fn(batch, ctx) outputs = [net(X) for X in data] if opt.flip_test: data_flip = [nd.flip(X, axis=3) for X in data] outputs_flip = [net(X) for X in data_flip] outputs_flipback = [flip_heatmap(o, val_dataset.joint_pairs, shift=True) for o in outputs_flip] outputs = [(o + o_flip)/2 for o, o_flip in zip(outputs, outputs_flipback)] if len(outputs) > 1: outputs_stack = nd.concat(*[o.as_in_context(mx.cpu()) for o in outputs], dim=0) else: outputs_stack = outputs[0].as_in_context(mx.cpu()) preds, maxvals = get_final_preds(outputs_stack, center.asnumpy(), scale.asnumpy()) val_metric.update(preds, maxvals, score, imgid) res = val_metric.get() return
def run_synthetic_SGLD(): theta1 = 0 theta2 = 1 sigma1 = numpy.sqrt(10) sigma2 = 1 sigmax = numpy.sqrt(2) X = load_synthetic(theta1=theta1, theta2=theta2, sigmax=sigmax, num=100) minibatch_size = 1 total_iter_num = 1000000 lr_scheduler = SGLDScheduler(begin_rate=0.01, end_rate=0.0001, total_iter_num=total_iter_num, factor=0.55) optimizer = mx.optimizer.create('sgld', learning_rate=None, rescale_grad=1.0, lr_scheduler=lr_scheduler, wd=0) updater = mx.optimizer.get_updater(optimizer) theta = mx.random.normal(0, 1, (2,), mx.cpu()) grad = nd.empty((2,), mx.cpu()) samples = numpy.zeros((2, total_iter_num)) start = time.time() for i in xrange(total_iter_num): if (i + 1) % 100000 == 0: end = time.time() print("Iter:%d, Time spent: %f" % (i + 1, end - start)) start = time.time() ind = numpy.random.randint(0, X.shape[0]) synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad= X.shape[0] / float(minibatch_size), grad=grad) updater('theta', grad, theta) samples[:, i] = theta.asnumpy() plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet) plt.colorbar() plt.show()
def test_parameter_sharing(): class Net(gluon.Block): def __init__(self, in_units=0, **kwargs): super(Net, self).__init__(**kwargs) with self.name_scope(): self.dense0 = nn.Dense(5, in_units=in_units) self.dense1 = nn.Dense(5, in_units=in_units) def forward(self, x): return self.dense1(self.dense0(x)) net1 = Net(prefix='net1_', in_units=5) net2 = Net(prefix='net2_', params=net1.collect_params()) net1.collect_params().initialize() net2(mx.nd.zeros((3, 5))) net1.save_parameters('net1.params') net3 = Net(prefix='net3_') net3.load_parameters('net1.params', mx.cpu()) net4 = Net(prefix='net4_') net5 = Net(prefix='net5_', in_units=5, params=net4.collect_params()) net4.collect_params().initialize() net5(mx.nd.zeros((3, 5))) net4.save_parameters('net4.params') net6 = Net(prefix='net6_') net6.load_parameters('net4.params', mx.cpu())
def check_trainer_reset_kv(kv): params = gluon.ParameterDict() x = params.get('x', shape=(10,), lr_mult=1.0) params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') trainer = gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv) params.save('test_trainer_reset_kv.params') with mx.autograd.record(): for w in x.list_data(): y = w + 1 y.backward() trainer.step(1) assert trainer._kvstore.type == kv # load would reset kvstore mx.nd.waitall() params.load('test_trainer_reset_kv.params') if trainer._update_on_kvstore: # drop kvstore state if new parameters are loaded assert trainer._kvstore is None assert trainer._kv_initialized is False with mx.autograd.record(): for w in x.list_data(): y = w + 1 y.backward() trainer.step(1) # the updated parameter should be based on the loaded checkpoint assert (x.data(mx.cpu()) == -0.2).asnumpy().all()
def test_module_ctx_group(): with mx.AttrScope(ctx_group='dev1'): a = mx.symbol.Variable('a') a = a * 2 with mx.AttrScope(ctx_group='dev2'): b = mx.symbol.Variable('b') c = a + b shape = (2, 5) mod1 = mx.mod.Module(c, context=[mx.cpu(0)], data_names=['a', 'b'], label_names=None, group2ctxs=[{'dev1':mx.cpu(1),'dev2':mx.cpu(2)}]) mod1.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True) mod1.init_params() mod1.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True) mod1.backward([mx.nd.ones(shape)]) mod1_input_grads = mod1.get_input_grads() mod2 = mx.mod.Module(c, data_names=['a', 'b'], label_names=None) mod2.bind(data_shapes=[['a', shape], ['b', shape]], inputs_need_grad=True) mod2.init_params() mod2.forward(data_batch=mx.io.DataBatch(data=[mx.nd.ones(shape), mx.nd.ones(shape)]), is_train=True) mod2.backward([mx.nd.ones(shape)]) mod2_input_grads = mod2.get_input_grads() assert np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy()) assert np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy())
def test_concat_with_type(): sym = mx.sym.Concat(name="concat", num_args=2) ctx_list = [ { "ctx": mx.gpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float64, "concat_arg1": np.float64}, }, { "ctx": mx.gpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float32, "concat_arg1": np.float32}, }, { "ctx": mx.gpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float16, "concat_arg1": np.float16}, }, { "ctx": mx.cpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float64, "concat_arg1": np.float64}, }, { "ctx": mx.cpu(0), "concat_arg1": (2, 10), "concat_arg0": (2, 10), "type_dict": {"concat_arg0": np.float32, "concat_arg1": np.float32}, }, ] check_consistency(sym, ctx_list)
def test_fullyconnected_with_type(): sym = mx.sym.FullyConnected(num_hidden=3, name='inner') ctx_list = [{'ctx': mx.gpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float64}}, {'ctx': mx.gpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float32}}, {'ctx': mx.cpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float64}}, {'ctx': mx.cpu(0), 'inner_data': (2, 10), 'type_dict': {'inner_data': np.float32}}] check_consistency(sym, ctx_list)
def test_ctx_group(): with mx.AttrScope(ctx_group='stage1'): data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") set_stage1 = set(act1.list_arguments()) with mx.AttrScope(ctx_group='stage2'): fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) fc3 = mx.symbol.BatchNorm(fc3) mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax') set_stage2 = set(mlp.list_arguments()) - set_stage1 group2ctx = { 'stage1' : mx.cpu(1), 'stage2' : mx.cpu(2) } texec = mlp.simple_bind(mx.cpu(0), group2ctx=group2ctx, data=(1,200)) for arr, name in zip(texec.arg_arrays, mlp.list_arguments()): if name in set_stage1: assert arr.context == group2ctx['stage1'] else: assert arr.context == group2ctx['stage2']
def test_parameter(): p = gluon.Parameter('weight', shape=(10, 10)) p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)]) assert len(p.list_data()) == 2 assert len(p.list_grad()) == 2 assert p.data(mx.cpu(1)).context == mx.cpu(1) assert p.data(mx.cpu(0)).shape == (10, 10) assert p.var().name == 'weight'
def test_upsampling_with_type(): sym = mx.sym.UpSampling(scale=2, num_filter=2, name='up', sample_type = 'nearest', num_args=1) ctx_list = [{'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float64}}, {'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float32}}, {'ctx': mx.gpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float16}}, {'ctx': mx.cpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float64}}, {'ctx': mx.cpu(0), 'up_arg0': (2, 2, 2, 10), 'type_dict': {'up_arg0': np.float32}}] check_consistency(sym, ctx_list)
def test_reshape_with_type(): sym = mx.sym.Reshape(name='reshape', shape=(-1,1,1,0)) ctx_list = [{'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float64}}, {'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float32}}, {'ctx': mx.gpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float16}}, {'ctx': mx.cpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float64}}, {'ctx': mx.cpu(0), 'reshape_data': (2, 2, 2, 10), 'type_dict': {'reshape_data': np.float32}}] check_consistency(sym, ctx_list)
def test_blockgrad_with_type(): sym = mx.sym.BlockGrad(name='bg') ctx_list = [{'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float64}}, {'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float32}}, {'ctx': mx.gpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float16}}, {'ctx': mx.cpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float64}}, {'ctx': mx.cpu(0), 'bg_data': (2, 2, 2, 10), 'type_dict': {'bg_data': np.float32}}] check_consistency(sym, ctx_list)
def test_swapaxis_with_type(): sym = mx.sym.SwapAxis(name='swap', dim1=1) ctx_list = [{'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float64}}, {'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float32}}, {'ctx': mx.gpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float16}}, {'ctx': mx.cpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float64}}, {'ctx': mx.cpu(0), 'swap_data': (2, 2, 2, 10), 'type_dict': {'swap_data': np.float32}}] check_consistency(sym, ctx_list)
def test_wrapper(*args, **kwargs): try: a = mx.nd.zeros((1,), ctx=mx.cpu(cpu_id)) ctx = mx.cpu(cpu_id) except Exception: ctx = mx.cpu(0) with ctx: orig_test(*args, **kwargs)
def get_extractor(): model = mx.model.FeedForward.load('./resnet-50', 0, ctx=mx.cpu(), numpy_batch_size=1) fea_symbol = model.symbol.get_internals()["flatten0_output"] feature_extractor = mx.model.FeedForward(ctx=mx.cpu(), symbol=fea_symbol, numpy_batch_size=64, arg_params=model.arg_params, aux_params=model.aux_params, allow_extra_params=True) return feature_extractor
def test_deconvolution_with_type(): sym = mx.sym.Deconvolution(num_filter=2, kernel=(3,3), name='deconv') ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float64}}, {'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float32}}, {'ctx': mx.gpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float16}}, {'ctx': mx.cpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float64}}, {'ctx': mx.cpu(0), 'deconv_data': (2, 2, 10, 10), 'type_dict': {'deconv_data': np.float32}}] check_consistency(sym, ctx_list) check_consistency(sym, ctx_list, grad_req="add")
def test_svmoutput_with_type(): sym = mx.sym.SVMOutput(name='svmoutput', use_linear=True) ctx_list = [{'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float64}}, {'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float32}}, {'ctx': mx.gpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float16}}, {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float64}}, {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float32}}, {'ctx': mx.cpu(0), 'svmoutput_data': (20, 10), 'type_dict': {'svmoutput_data': np.float16}}] check_consistency(sym, ctx_list)
def evaluate(data_eval, model, nsp_loss, mlm_loss, vocab_size, ctx, log_interval, dtype): """Evaluation function.""" logging.info('Running evaluation ... ') mlm_metric = MaskedAccuracy() nsp_metric = MaskedAccuracy() mlm_metric.reset() nsp_metric.reset() eval_begin_time = time.time() begin_time = time.time() step_num = 0 running_mlm_loss = running_nsp_loss = 0 total_mlm_loss = total_nsp_loss = 0 running_num_tks = 0 for _, dataloader in enumerate(data_eval): for _, data_batch in enumerate(dataloader): step_num += 1 data_list = split_and_load(data_batch, ctx) loss_list = [] ns_label_list, ns_pred_list = [], [] mask_label_list, mask_pred_list, mask_weight_list = [], [], [] for data in data_list: out = forward(data, model, mlm_loss, nsp_loss, vocab_size, dtype) (ls, next_sentence_label, classified, masked_id, decoded, masked_weight, ls1, ls2, valid_length) = out loss_list.append(ls) ns_label_list.append(next_sentence_label) ns_pred_list.append(classified) mask_label_list.append(masked_id) mask_pred_list.append(decoded) mask_weight_list.append(masked_weight) running_mlm_loss += ls1.as_in_context(mx.cpu()) running_nsp_loss += ls2.as_in_context(mx.cpu()) running_num_tks += valid_length.sum().as_in_context(mx.cpu()) nsp_metric.update(ns_label_list, ns_pred_list) mlm_metric.update(mask_label_list, mask_pred_list, mask_weight_list) # logging if (step_num + 1) % (log_interval) == 0: total_mlm_loss += running_mlm_loss total_nsp_loss += running_nsp_loss log(begin_time, running_num_tks, running_mlm_loss, running_nsp_loss, step_num, mlm_metric, nsp_metric, None, log_interval) begin_time = time.time() running_mlm_loss = running_nsp_loss = running_num_tks = 0 mlm_metric.reset_local() nsp_metric.reset_local() mx.nd.waitall() eval_end_time = time.time() # accumulate losses from last few batches, too if running_mlm_loss != 0: total_mlm_loss += running_mlm_loss total_nsp_loss += running_nsp_loss total_mlm_loss /= step_num total_nsp_loss /= step_num logging.info( 'Eval mlm_loss={:.3f}\tmlm_acc={:.1f}\tnsp_loss={:.3f}\tnsp_acc={:.1f}\t' .format(total_mlm_loss.asscalar(), mlm_metric.get_global()[1] * 100, total_nsp_loss.asscalar(), nsp_metric.get_global()[1] * 100)) logging.info('Eval cost={:.1f}s'.format(eval_end_time - eval_begin_time))
# We use CSV so that not all data need to sit into memory # You can also use inmemory numpy array if your machine is large enough encode_csv("./train-label.csv", "./train-stytole.csv", "./train-diastole.csv") num_epoch = 35 learning_rate = 0.01 wd = 0.00001 momentum = 0.95 # # Training the stytole net MXNET_CPU_WORKER_NTHREADS = 32 # In[4]: network = get_lenet() batch_size = 32 devs = [mx.cpu(8)] data_train = mx.io.CSVIter(data_csv="./train-64x64-data.csv", data_shape=(30, 64, 64), label_csv="./train-stytole.csv", label_shape=(600, ), batch_size=batch_size) data_validate = mx.io.CSVIter(data_csv="./validate-64x64-data.csv", data_shape=(30, 64, 64), batch_size=1) stytole_model = mx.model.FeedForward(ctx=devs, symbol=network, num_epoch=num_epoch, learning_rate=learning_rate, wd=wd,
# define mlp data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu") fc2 = mx.symbol.FullyConnected(data=act1, name='fc2', num_hidden=64) act2 = mx.symbol.Activation(data=fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data=act2, name='fc3', num_hidden=10) #mlp = mx.symbol.Softmax(data = fc3, name = 'softmax') mlp = mx.symbol.Custom(data=fc3, name='softmax', op_type='softmax') # data train, val = MNISTIterator(batch_size=100, input_shape=(784, )) # train logging.basicConfig(level=logging.DEBUG) # MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU model = mx.model.FeedForward(ctx=mx.cpu(0), symbol=mlp, num_epoch=20, learning_rate=0.1, momentum=0.9, wd=0.00001) model.fit(X=train, eval_data=val, batch_end_callback=mx.callback.Speedometer(100, 100))
import mxnet as mx import mxnet.ndarray as nd import mxnet.autograd as autograd from matplotlib import pyplot as plt from data_preprocessing import data_preprocessing from tqdm import * import urllib import os def artistic_Image(noise_image, image_size): image = noise_image.reshape((-1, ) + image_size) r, g, b = nd.split(image, axis=0, num_outputs=3) #Denormalization by JG r = nd.multiply(r, 0.229) + 0.485 g = nd.multiply(g, 0.224) + 0.456 b = nd.multiply(b, 0.225) + 0.406 image = nd.concat(r, g, b, dim=0) ''' matplotlib supports float32 and uint8 data types. For grayscale, matplotlib supports only float32. If your array data does not meet one of these descriptions, you need to rescale it. ''' image = nd.transpose(image, axes=(1, 2, 0)) image = nd.clip(image, a_min=0, a_max=1) image = nd.multiply(image, 255) image = nd.clip(image, a_min=0, a_max=255).astype('uint8') plt.imshow(image.asnumpy()) plt.savefig("Artistic Image.png", dpi=200) def neuralstyle(epoch=1000, show_period=100,
def get_res2net(blocks, width, scale, model_name=None, pretrained=False, ctx=cpu(), root=os.path.join("~", ".mxnet", "models"), **kwargs): """ Create Res2Net model with specific parameters. Parameters: ---------- blocks : int Number of blocks. width : int Width of filters. scale : int Number of scale. model_name : str or None, default None Model name for loading pretrained model. pretrained : bool, default False Whether to load the pretrained weights for model. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. """ bottleneck = True if blocks == 50: layers = [3, 4, 6, 3] elif blocks == 101: layers = [3, 4, 23, 3] elif blocks == 152: layers = [3, 8, 36, 3] else: raise ValueError( "Unsupported Res2Net with number of blocks: {}".format(blocks)) assert (sum(layers) * 3 + 2 == blocks) init_block_channels = 64 channels_per_layers = [64, 128, 256, 512] if bottleneck: bottleneck_factor = 4 channels_per_layers = [ ci * bottleneck_factor for ci in channels_per_layers ] channels = [[ci] * li for (ci, li) in zip(channels_per_layers, layers)] net = Res2Net(channels=channels, init_block_channels=init_block_channels, width=width, scale=scale, **kwargs) if pretrained: if (model_name is None) or (not model_name): raise ValueError( "Parameter `model_name` should be properly initialized for loading pretrained model." ) from .model_store import get_model_file net.load_parameters(filename=get_model_file( model_name=model_name, local_model_store_dir_path=root), ctx=ctx) return net
self.critic_network.save_parameters( 'A2C_CartPole_critic_network.params') def load(self): self.actor_network.load_parameters('A2C_CartPole_actor_network.params') self.critic_network.load_parameters( 'A2C_CartPole_critic_network.params') if __name__ == '__main__': seed = 77777777 np.random.seed(seed) mx.random.seed(seed) env = gym.make('CartPole-v0').unwrapped env.seed(seed) ctx = mx.cpu() render = False agent = A2C(gamma=0.99, action_dim=env.action_space.n, observation_dim=env.observation_space.shape[0], ctx=ctx) episode_reward_list = [] max_episodes = 400 max_episode_steps = 500 for episode in range(max_episodes): state = env.reset() episode_reward = 0 for episode_step in range(max_episode_steps): if render:
# The first fully-connected layer and the corresponding activation function fc1 = mx.sym.FullyConnected(data=data, num_hidden=128) act1 = mx.sym.Activation(data=fc1, act_type="relu") # The second fully-connected layer and the corresponding activation function fc2 = mx.sym.FullyConnected(data=act1, num_hidden = 64) act2 = mx.sym.Activation(data=fc2, act_type="relu") # MNIST has 10 classes fc3 = mx.sym.FullyConnected(data=act2, num_hidden=10) # Softmax with cross entropy loss mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax') # create a trainable module on CPU mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu()) # training & logging time times = [] for _ in range(10): batch_size = 100 train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True) val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size) ts = time.time() mlp_model.fit(train_iter, # train data eval_data=val_iter, # validation data optimizer='sgd', # use SGD to train optimizer_params={'learning_rate':0.1}, # use fixed learning rate eval_metric='acc', # report accuracy during training batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches num_epoch=10) # train for at most 10 dataset passes
def fit(args, network, data_loader, **kwargs): """ train a model args : argparse returns network : the symbol definition of the nerual network data_loader : function that returns the train and val data iterators """ # kvstore kv = mx.kvstore.create(args.kv_store) # logging head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s' logging.basicConfig(level=logging.DEBUG, format=head) logging.info('start with arguments %s', args) # data iterators (train, val) = data_loader(args, kv) if args.test_io: tic = time.time() for i, batch in enumerate(train): for j in batch.data: j.wait_to_read() if (i+1) % args.disp_batches == 0: logging.info('Batch [%d]\tSpeed: %.2f samples/sec' % ( i, args.disp_batches*args.batch_size/(time.time()-tic))) tic = time.time() return # load model if 'arg_params' in kwargs and 'aux_params' in kwargs: arg_params = kwargs['arg_params'] aux_params = kwargs['aux_params'] else: sym, arg_params, aux_params = _load_model(args, kv.rank) if sym is not None: assert sym.tojson() == network.tojson() # save model checkpoint = _save_model(args, kv.rank) # devices for training devs = mx.cpu() if args.gpus is None or args.gpus is '' else [ mx.gpu(int(i)) for i in args.gpus.split(',')] # learning rate lr, lr_scheduler = _get_lr_scheduler(args, kv) # create model model = mx.mod.Module( context = devs, symbol = network ) lr_scheduler = lr_scheduler optimizer_params = { 'learning_rate': lr, 'momentum' : args.mom, 'wd' : args.wd, 'lr_scheduler': lr_scheduler} monitor = mx.mon.Monitor(args.monitor, pattern=".*") if args.monitor > 0 else None if args.network == 'alexnet': # AlexNet will not converge using Xavier initializer = mx.init.Normal() else: initializer = mx.init.Xavier( rnd_type='gaussian', factor_type="in", magnitude=2) # initializer = mx.init.Xavier(factor_type="in", magnitude=2.34), # evaluation metrices eval_metrics = ['accuracy'] if args.top_k > 0: eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=args.top_k)) # callbacks that run after each batch batch_end_callbacks = [mx.callback.Speedometer(args.batch_size, args.disp_batches)] if 'batch_end_callback' in kwargs: cbs = kwargs['batch_end_callback'] batch_end_callbacks += cbs if isinstance(cbs, list) else [cbs] # run model.fit(train, begin_epoch = args.load_epoch if args.load_epoch else 0, num_epoch = args.num_epochs, eval_data = val, eval_metric = eval_metrics, kvstore = kv, optimizer = args.optimizer, optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, batch_end_callback = batch_end_callbacks, epoch_end_callback = checkpoint, allow_missing = True, monitor = monitor)
def fit(self, train_data, eval_data=None, eval_metric='acc', grad_req='write', epoch_end_callback=None, batch_end_callback=None, kvstore='local', logger=None): global outimgiter if logger is None: logger = logging logging.info('Start training with %s', str(self.ctx)) logging.info(str(self.kwargs)) batch_size = train_data.provide_data[0][1][0] arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape( \ data=tuple(train_data.provide_data[0][1]), label_det=(batch_size,200,6)) arg_names = self.symbol.list_arguments() out_names = self.symbol.list_outputs() aux_names = self.symbol.list_auxiliary_states() # pprint([(n,s) for n,s in zip(arg_names,arg_shapes)]) # pprint([(n,s) for n,s in zip(out_names,out_shapes)]) # pprint([(n,s) for n,s in zip(aux_names,aux_shapes)]) if grad_req != 'null': self.grad_params = {} for name, shape in zip(arg_names, arg_shapes): if not (name.endswith('data') or name.endswith('label')): self.grad_params[name] = mx.nd.zeros(shape, self.ctx) else: self.grad_params = None self.aux_params = { k: mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes) } data_name = train_data.provide_data[0][0] label_name_det = train_data.provide_label[0][0] label_name_seg = train_data.provide_label[1][0] input_names = [data_name, label_name_det, label_name_seg] print(train_data.provide_label) # print(os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"]) self.optimizer = opt.create(self.optimizer, rescale_grad=(1.0 / train_data.batch_size), **(self.kwargs)) self.updater = get_updater(self.optimizer) eval_metric = CustomAccuracyMetric() # metric.create(eval_metric) multibox_metric = MultiBoxMetric() eval_metrics = metric.CompositeEvalMetric() eval_metrics.add(multibox_metric) eval_metrics.add(eval_metric) # begin training for epoch in range(self.begin_epoch, self.num_epoch): nbatch = 0 train_data.reset() eval_metrics.reset() logger.info('learning rate: ' + str(self.optimizer.learning_rate)) for data, _ in train_data: if self.evaluation_only: break nbatch += 1 label_shape_det = data.label[0].shape label_shape_seg = data.label[1].shape self.arg_params[data_name] = mx.nd.array( data.data[0], self.ctx) self.arg_params[label_name_det] = mx.nd.array( data.label[0], self.ctx) self.arg_params[label_name_seg] = mx.nd.array( data.label[1], self.ctx) output_names = self.symbol.list_outputs() ###################### analyze shapes #################### # pprint([(k,v.shape) for k,v in self.arg_params.items()]) self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=self.grad_params, grad_req=grad_req, aux_states=self.aux_params) assert len(self.symbol.list_arguments()) == len( self.executor.grad_arrays) update_dict = {name: nd for name, nd in zip(self.symbol.list_arguments(), \ self.executor.grad_arrays) if nd is not None} output_dict = {} output_buff = {} for key, arr in zip(self.symbol.list_outputs(), self.executor.outputs): output_dict[key] = arr output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu()) # output_buff[key] = mx.nd.empty(arr.shape, ctx=self.ctx) def stat_helper(name, array): """wrapper for executor callback""" import ctypes from mxnet.ndarray import NDArray from mxnet.base import NDArrayHandle, py_str array = ctypes.cast(array, NDArrayHandle) if 0: array = NDArray(array, writable=False).asnumpy() print(name, array.shape, np.mean(array), np.std(array), ('%.1fms' % (float(time.time() - stat_helper.start_time) * 1000))) else: array = NDArray(array, writable=False) array.wait_to_read() elapsed = float(time.time() - stat_helper.start_time) * 1000. if elapsed > 0: print(name, array.shape, ('%.1fms' % (elapsed, ))) stat_helper.start_time = time.time() stat_helper.start_time = float(time.time()) # self.executor.set_monitor_callback(stat_helper) tic = time.time() self.executor.forward(is_train=True) for key in output_dict: output_dict[key].copyto(output_buff[key]) # exit(0) # for debugging forward pass only self.executor.backward() for key, arr in update_dict.items(): if key != "bigscore_weight": self.updater(key, arr, self.arg_params[key]) for output in self.executor.outputs: output.wait_to_read() if TIMING: print("%.0fms" % ((time.time() - tic) * 1000., )) output_dict = dict(zip(output_names, self.executor.outputs)) pred_det_shape = output_dict["det_out_output"].shape pred_seg_shape = output_dict["seg_out_output"].shape label_det = mx.nd.array(data.label[0].reshape( (label_shape_det[0], label_shape_det[1] * label_shape_det[2]))) label_seg = mx.nd.array(data.label[1].reshape( (label_shape_seg[0], label_shape_seg[1] * label_shape_seg[2]))) pred_det = mx.nd.array(output_buff["det_out_output"].reshape( (pred_det_shape[0], pred_det_shape[1], pred_det_shape[2]))) pred_seg = mx.nd.array(output_buff["seg_out_output"].reshape( (pred_seg_shape[0], pred_seg_shape[1], pred_seg_shape[2] * pred_seg_shape[3]))) if DEBUG: print(data.label[0].asnumpy()[0, :2, :]) if TIMING: print("%.0fms" % ((time.time() - tic) * 1000., )) eval_metrics.get_metric(0).update([ mx.nd.zeros(output_buff["cls_prob_output"].shape), mx.nd.zeros(output_buff["loc_loss_output"].shape), label_det ], [ output_buff["cls_prob_output"], output_buff["loc_loss_output"], output_buff["cls_label_output"] ]) eval_metrics.get_metric(1).update( [label_seg.as_in_context(self.ctx)], [pred_seg.as_in_context(self.ctx)]) self.executor.outputs[0].wait_to_read() ##################### display results ############################## # out_img = output_dict["seg_out_output"].asnumpy() # out_det = output_dict["det_out_output"].asnumpy() # for imgidx in range(out_img.shape[0]): # res_img = np.squeeze(out_img[imgidx,:,:].argmax(axis=0).astype(np.uint8)) # label_img = data.label[1].asnumpy()[imgidx,:,:].astype(np.uint8) # img = np.squeeze(data.data[0].asnumpy()[imgidx,:,:,:]) # det = out_det[imgidx,:,:] # gt = label_det.asnumpy()[imgidx,:].reshape((-1,6)) # display_results(res_img,np.expand_dims(label_img,axis=0),img, det, gt, self.class_names) # [exit(0) if (cv2.waitKey()&0xff)==27 else None] # outimgiter += 1 batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch, eval_metric=eval_metrics) batch_end_callback(batch_end_params) if TIMING: print("%.0fms" % ((time.time() - tic) * 1000., )) # exit(0) # for debugging only ##### save snapshot if (not self.evaluation_only) and (epoch_end_callback is not None): epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params) names, values = eval_metrics.get() for name, value in zip(names, values): logger.info(" --->Epoch[%d] Train-%s=%f", epoch, name, value) # evaluation if eval_data: logger.info(" in eval process...") nbatch = 0 depth_metric = DistanceAccuracyMetric( class_names=self.class_names) eval_data.reset() eval_metrics.reset() self.valid_metric.reset() depth_metric.reset() timing_results = [] for data, fnames in eval_data: nbatch += 1 label_shape_det = data.label[0].shape label_shape_seg = data.label[1].shape self.arg_params[data_name] = mx.nd.array( data.data[0], self.ctx) self.arg_params[label_name_det] = mx.nd.array( data.label[0], self.ctx) self.arg_params[label_name_seg] = mx.nd.array( data.label[1], self.ctx) self.executor = self.symbol.bind( self.ctx, self.arg_params, args_grad=self.grad_params, grad_req=grad_req, aux_states=self.aux_params) output_names = self.symbol.list_outputs() output_dict = dict(zip(output_names, self.executor.outputs)) cpu_output_array = mx.nd.zeros( output_dict["seg_out_output"].shape) ############## monitor status # def stat_helper(name, array): # """wrapper for executor callback""" # import ctypes # from mxnet.ndarray import NDArray # from mxnet.base import NDArrayHandle, py_str # array = ctypes.cast(array, NDArrayHandle) # if 1: # array = NDArray(array, writable=False).asnumpy() # print (name, array.shape, np.mean(array), np.std(array), # ('%.1fms' % (float(time.time()-stat_helper.start_time)*1000))) # else: # array = NDArray(array, writable=False) # array.wait_to_read() # elapsed = float(time.time()-stat_helper.start_time)*1000. # if elapsed>5: # print (name, array.shape, ('%.1fms' % (elapsed,))) # stat_helper.start_time=time.time() # stat_helper.start_time=float(time.time()) # self.executor.set_monitor_callback(stat_helper) ############## forward tic = time.time() self.executor.forward(is_train=True) output_dict["seg_out_output"].wait_to_read() timing_results.append((time.time() - tic) * 1000.) output_dict["seg_out_output"].copyto(cpu_output_array) pred_shape = output_dict["seg_out_output"].shape label = mx.nd.array(data.label[1].reshape( (label_shape_seg[0], label_shape_seg[1] * label_shape_seg[2]))) output_dict["seg_out_output"].wait_to_read() seg_out_output = output_dict["seg_out_output"].asnumpy() pred_det_shape = output_dict["det_out_output"].shape pred_seg_shape = output_dict["seg_out_output"].shape label_det = mx.nd.array(data.label[0].reshape( (label_shape_det[0], label_shape_det[1] * label_shape_det[2]))) label_seg = mx.nd.array(data.label[1].reshape( (label_shape_seg[0], label_shape_seg[1] * label_shape_seg[2])), ctx=self.ctx) pred_det = mx.nd.array( output_dict["det_out_output"].reshape( (pred_det_shape[0], pred_det_shape[1], pred_det_shape[2]))) pred_seg = mx.nd.array( output_dict["seg_out_output"].reshape( (pred_seg_shape[0], pred_seg_shape[1], pred_seg_shape[2] * pred_seg_shape[3])), ctx=self.ctx) #### remove invalid boxes out_dets = output_dict["det_out_output"].asnumpy() assert len(out_dets.shape) == 3 pred_det = np.zeros((batch_size, 200, 7), np.float32) - 1. for idx, out_det in enumerate(out_dets): assert len(out_det.shape) == 2 out_det = np.expand_dims(out_det, axis=0) indices = np.where( out_det[:, :, 0] >= 0) # labeled as negative out_det = np.expand_dims(out_det[indices[0], indices[1], :], axis=0) indices = np.where( out_det[:, :, 1] > .25) # higher confidence out_det = np.expand_dims(out_det[indices[0], indices[1], :], axis=0) pred_det[idx, :out_det.shape[1], :] = out_det del out_det pred_det = mx.nd.array(pred_det) ##### display results if self.evaluation_only: out_img = output_dict["seg_out_output"] out_img = mx.nd.split(out_img, axis=0, num_outputs=out_img.shape[0], squeeze_axis=0) if not isinstance(out_img, list): out_img = [out_img] for imgidx in range(eval_data.batch_size): ### segmentation seg_prob = out_img[imgidx] seg_prob = mx.nd.array(np.squeeze( seg_prob.asnumpy(), axis=(0, )), ctx=self.ctx) res_img = np.squeeze(seg_prob.asnumpy().argmax( axis=0).astype(np.uint8)) # res_img = np.squeeze(out_img[imgidx,:,:].argmax(axis=0).astype(np.uint8)) label_img = data.label[1].asnumpy()[ imgidx, :, :].astype(np.uint8) img = np.squeeze( data.data[0].asnumpy()[imgidx, :, :, :]) det = pred_det.asnumpy()[imgidx, :, :] ### ground-truth gt = label_det.asnumpy()[imgidx, :].reshape( (-1, 6)) # save to results folder for evalutation res_fname = fnames[imgidx].replace( "SegmentationClass", "Results").replace("gtFine_labelTrainIds", "results") lut = np.zeros(256) lut[:19] = np.array([ 7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33 ]) # lut[:20]=np.array([7,8,11,12,13,17,19,20,21,22,23,24,25,26,27,28,31,32,33,34]) seg_resized = prob_upsampling(seg_prob, target_shape=(1024, 2048)) seg_resized2 = cv2.LUT(seg_resized, lut) if cv2.imwrite(res_fname, seg_resized2): print(res_fname, 'saved.') # display result display_img = display_results( res_img, np.expand_dims(label_img, axis=0), img, det, gt, self.class_names) res_fname = fnames[imgidx].replace( "SegmentationClass", "Results").replace("gtFine_labelTrainIds", "compare") if cv2.imwrite(res_fname, display_img): print(res_fname, 'saved.') # [exit(0) if (cv2.waitKey()&0xff)==27 else None] outimgiter += 1 if self.evaluation_only: continue eval_metrics.get_metric(0).update(None, [ output_dict["cls_prob_output"], output_dict["loc_loss_output"], output_dict["cls_label_output"] ]) eval_metrics.get_metric(1).update([label_seg], [pred_seg]) self.valid_metric.update([mx.nd.slice_axis(data.label[0],axis=2,begin=0,end=5)], \ [mx.nd.slice_axis(pred_det,axis=2,begin=0,end=6)]) disparities = [] for imgidx in range(batch_size): dispname = fnames[imgidx].replace( "SegmentationClass", "Disparity").replace("gtFine_labelTrainIds", "disparity") disparities.append(cv2.imread(dispname, -1)) assert disparities[ 0] is not None, dispname + " not found." depth_metric.update(mx.nd.array(disparities), [pred_det]) det_metric = self.valid_metric seg_metric = eval_metrics.get_metric(1) det_names, det_values = det_metric.get() seg_name, seg_value = seg_metric.get() depth_names, depth_values = depth_metric.get() print("\r %d/%d speed=%.1fms %.1f%% %s=%.1f %s=%.1f %s=%.1f" % \ (nbatch*eval_data.batch_size,eval_data.num_samples, math.fsum(timing_results)/float(nbatch), float(nbatch*eval_data.batch_size)*100./float(eval_data.num_samples), det_names[-1],det_values[-1]*100., seg_name,seg_value*100., depth_names[-1],depth_values[-1]*100.,),end='\r') names, values = eval_metrics.get() for name, value in zip(names, values): logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value) logger.info('----------------------------------------------') logger.info(' & '.join(names)) logger.info(' & '.join( map(lambda v: '%.1f' % (v * 100., ), values))) logger.info('----------------------------------------------') names, values = self.valid_metric.get() for name, value in zip(names, values): logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value) logger.info('----------------------------------------------') logger.info(' & '.join(names)) logger.info(' & '.join( map(lambda v: '%.1f' % (v * 100., ), values))) logger.info('----------------------------------------------') names, values = depth_metric.get() for name, value in zip(names, values): logger.info(' epoch[%d] Validation-%s=%f', epoch, name, value) logger.info('----------------------------------------------') logger.info(' & '.join(names)) logger.info(' & '.join( map(lambda v: '%.1f' % (v * 100., ), values))) logger.info('----------------------------------------------') if self.evaluation_only: exit(0) ## for debugging only
exclude_blocks.extend( [net.features[2][0].body[0], net.features[2][0].body[1]]) print('*' * 25 + ' Exclude blocks ' + '*' * 25) for b in exclude_blocks: print(b.name) print('*' * (25 * 2 + len(' Exclude blocks '))) print() convert.convert_model( net, exclude=exclude_blocks, convert_fn=convert_fn, ) # initialize for quantization parameters and reset context qparams_init(net) ctx = gpu(opt.use_gpu) if opt.use_gpu != -1 else cpu() net.collect_params().reset_ctx(ctx) # construct transformer if opt.dataset == 'imagenet': eval_transformer = T.Compose([ T.Resize(256, keep_ratio=True), T.CenterCrop(224), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: eval_transformer = T.Compose([ T.ToTensor(), T.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ])
out = net(data) out = nd.SoftmaxActivation(out).mean(axis=0) # softmax process out = out.asnumpy().tolist() # array to list # judge and delete if (out[2] > threshold) or (out[3] > threshold): os.remove(os.path.join(input_dir, movie, _image)) # you can just write the result into file without doing anything. out = [str(number) for number in out] string = '%s:%s' % (image_file, ','.join(out)) writeResult(string + '\n') # you can also move these images to another directory print('Movie %s finished.' % movie) if __name__ == "__main__": # parse command line arguments args = parseArgs() task = 'face_classification' model_name = args.model task_num_class = args.class_number task_param = '..data/%s_%s.params' % (model_name, task) use_gpu = args.use_gpu ctx = mx.gpu() if use_gpu else mx.cpu() num_workers = args.worker_number input_dir = args.input_dir threshold = args.threshold net = loadModel(model_name, task_num_class, task_param, ctx) predict(net, ctx, input_dir, threshold)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size==0: args.per_batch_size = 128 args.batch_size = args.per_batch_size*args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') assert len(data_dir_list)==1 data_dir = data_dir_list[0] path_imgrec = None path_imglist = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert(args.num_classes>0) print('num_classes', args.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") if args.loss_type==1 and args.num_classes>20000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel,image_size[0],image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained)==0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) #if 'fc7_weight' in arg_params.keys(): # del arg_params['fc7_weight'] #if 'fc7_bias' in arg_params.keys(): # del arg_params['fc7_bias'] sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #if args.network[0]=='s': # data_shape_dict = {'data' : (args.per_batch_size,)+data_shape} # spherenet.init_weights(sym, data_shape_dict, args.num_layers) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context = ctx, symbol = sym, work_load_list = None, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrec = path_imgrec, shuffle = True, rand_mirror = args.rand_mirror, mean = mean, cutoff = args.cutoff, ) if args.loss_type<10: _metric = AccMetric() else: _metric = LossValueMetric() eval_metrics = [mx.metric.create(AccMetric()),mx.metric.create(LossValue())] if args.network[0]=='r' or args.network[0]=='y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0]=='i' or args.network[0]=='x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0/args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") #path = os.path.join("/ssd/MegaFace/MF2_aligned_pic9/",name+".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps)==0: lr_steps = [16000, 24000] if args.loss_type>=1 and args.loss_type<=7: #lr_steps = [16000, 24000, 28000] lr_steps = [32000, 48000, 60000] #lr_steps = [100000, 140000, 160000] p = 512.0/args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l]*p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] for _lr in lr_steps: if mbatch==args.beta_freeze+_lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch%1000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if mbatch>=0 and mbatch%args.verbose==0: arg, aux = model.get_params() mx.model.save_checkpoint(prefix, 0, model.symbol, arg, aux) acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False if len(acc_list)>0: lfw_score = acc_list[0] if lfw_score>highest_acc[0]: highest_acc[0] = lfw_score if lfw_score>=0.975: do_save = True if acc_list[-1]>=highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score>=0.985: do_save = True if acc_list[-1]>=0.985: do_save = True if args.ckpt==0: do_save = False elif args.ckpt>1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) if mbatch<=args.beta_freeze: _beta = args.beta else: move = max(0, mbatch-args.beta_freeze) _beta = max(args.beta_min, args.beta*math.pow(1+args.gamma*move, -1.0*args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps>0 and mbatch>args.max_steps: sys.exit(0) epoch_cb = None model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, eval_data = val_dataiter, eval_metric = eval_metrics, kvstore = 'device', optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def test_coverage_attention(attention_coverage_type, attention_coverage_num_hidden, batch_size=3, encoder_num_hidden=2, decoder_num_hidden=2): # source: (batch_size, seq_len, encoder_num_hidden) source = mx.sym.Variable("source") # source_length: (batch_size, ) source_length = mx.sym.Variable("source_length") source_seq_len = 10 config_coverage = sockeye.coverage.CoverageConfig( type=attention_coverage_type, num_hidden=attention_coverage_num_hidden, layer_normalization=False) config_attention = sockeye.rnn_attention.AttentionConfig( type="coverage", num_hidden=5, input_previous_word=False, source_num_hidden=encoder_num_hidden, query_num_hidden=decoder_num_hidden, layer_normalization=False, config_coverage=config_coverage) attention = sockeye.rnn_attention.get_attention(config_attention, max_seq_len=source_seq_len) attention_state = attention.get_initial_state(source_length, source_seq_len) attention_func = attention.on(source, source_length, source_seq_len) attention_input = attention.make_input(0, mx.sym.Variable("word_vec_prev"), mx.sym.Variable("decoder_state")) attention_state = attention_func(attention_input, attention_state) sym = mx.sym.Group([ attention_state.context, attention_state.probs, attention_state.dynamic_source ]) source_shape = (batch_size, source_seq_len, encoder_num_hidden) source_length_shape = (batch_size, ) decoder_state_shape = (batch_size, decoder_num_hidden) executor = sym.simple_bind(ctx=mx.cpu(), source=source_shape, source_length=source_length_shape, decoder_state=decoder_state_shape) source_length_vector = integer_vector(shape=source_length_shape, max_value=source_seq_len) executor.arg_dict["source"][:] = gaussian_vector(shape=source_shape) executor.arg_dict["source_length"][:] = source_length_vector executor.arg_dict["decoder_state"][:] = gaussian_vector( shape=decoder_state_shape) exec_output = executor.forward() context_result = exec_output[0].asnumpy() attention_prob_result = exec_output[1].asnumpy() dynamic_source_result = exec_output[2].asnumpy() expected_probs = (1. / source_length_vector).reshape((batch_size, 1)) assert context_result.shape == (batch_size, encoder_num_hidden) assert attention_prob_result.shape == (batch_size, source_seq_len) assert dynamic_source_result.shape == (batch_size, source_seq_len, attention_coverage_num_hidden) assert (np.sum(np.isclose(attention_prob_result, expected_probs), axis=1) == source_length_vector).all()
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 if args.loss_type == 10: args.per_batch_size = 256 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 ppatch = [int(x) for x in args.patch.split('_')] assert len(ppatch) == 5 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') if args.loss_type != 12 and args.loss_type != 13: assert len(data_dir_list) == 1 data_dir = data_dir_list[0] args.use_val = False path_imgrec = None path_imglist = None val_rec = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert (args.num_classes > 0) print('num_classes', args.num_classes) args.coco_scale = 0.5 * math.log(float(args.num_classes - 1)) + 3 # path_imglist = "/raid5data/dplearn/MS-Celeb-Aligned/lst2" path_imgrec = os.path.join(data_dir, "train.rec") val_rec = os.path.join(data_dir, "val.rec") if os.path.exists(val_rec) and args.loss_type < 10: args.use_val = True else: val_rec = None # args.use_val = False if args.loss_type == 1 and args.num_classes > 20000: args.beta_freeze = 5000 args.gamma = 0.06 if args.loss_type < 9: assert args.images_per_identity == 0 else: if args.images_per_identity == 0: if args.loss_type == 11: args.images_per_identity = 2 elif args.loss_type == 10 or args.loss_type == 9: args.images_per_identity = 16 elif args.loss_type == 12 or args.loss_type == 13: args.images_per_identity = 5 assert args.per_batch_size % 3 == 0 assert args.images_per_identity >= 2 args.per_identities = int(args.per_batch_size / args.images_per_identity) print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) if args.network[0] == 's': data_shape_dict = {'data': (args.per_batch_size,) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) data_extra = None hard_mining = False triplet_params = None coco_mode = False if args.loss_type == 10: hard_mining = True _shape = (args.batch_size, args.per_batch_size) data_extra = np.full(_shape, -1.0, dtype=np.float32) c = 0 while c < args.batch_size: a = 0 while a < args.per_batch_size: b = a + args.images_per_identity data_extra[(c + a):(c + b), a:b] = 1.0 # print(c+a, c+b, a, b) a = b c += args.per_batch_size elif args.loss_type == 11: data_extra = np.zeros((args.batch_size, args.per_identities), dtype=np.float32) c = 0 while c < args.batch_size: for i in range(args.per_identities): data_extra[c + i][i] = 1.0 c += args.per_batch_size elif args.loss_type == 12 or args.loss_type == 13: triplet_params = [args.triplet_bag_size, args.triplet_alpha, args.triplet_max_ap] elif args.loss_type == 9: coco_mode = True label_name = 'softmax_label' label_shape = (args.batch_size,) if args.output_c2c: label_shape = (args.batch_size, 2) if data_extra is None: model = mx.mod.Module( context=ctx, symbol=sym, ) else: data_names = ('data', 'extra') # label_name = '' model = mx.mod.Module( context=ctx, symbol=sym, data_names=data_names, label_names=(label_name,), ) if args.use_val: val_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=val_rec, # path_imglist = val_path, shuffle=False, rand_mirror=False, mean=mean, ctx_num=args.ctx_num, data_extra=data_extra, ) else: val_dataiter = None if len(data_dir_list) == 1 and args.loss_type != 12 and args.loss_type != 13: train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, c2c_threshold=args.c2c_threshold, output_c2c=args.output_c2c, c2c_mode=args.c2c_mode, limit=args.train_limit, ctx_num=args.ctx_num, images_per_identity=args.images_per_identity, data_extra=data_extra, hard_mining=hard_mining, triplet_params=triplet_params, coco_mode=coco_mode, mx_model=model, label_name=label_name, ) else: iter_list = [] for _data_dir in data_dir_list: _path_imgrec = os.path.join(_data_dir, "train.rec") _dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=_path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, c2c_threshold=args.c2c_threshold, output_c2c=args.output_c2c, c2c_mode=args.c2c_mode, limit=args.train_limit, ctx_num=args.ctx_num, images_per_identity=args.images_per_identity, data_extra=data_extra, hard_mining=hard_mining, triplet_params=triplet_params, coco_mode=coco_mode, mx_model=model, label_name=label_name, ) iter_list.append(_dataiter) iter_list.append(_dataiter) train_dataiter = FaceImageIterList(iter_list) if args.loss_type < 10: _metric = AccMetric() else: _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0] == 'r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) # resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) # inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num if args.noise_sgd > 0.0: print('use noise sgd') opt = NoiseSGD(scale=args.noise_sgd, learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) else: opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 20 if args.loss_type == 12 or args.loss_type == 13: som = 2 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in range(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, data_extra, label_shape) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) # print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results def val_test(): acc = AccMetric() val_metric = mx.metric.create(acc) val_metric.reset() val_dataiter.reset() for i, eval_batch in enumerate(val_dataiter): model.forward(eval_batch, is_train=False) model.update_metric(val_metric, eval_batch.label) acc_value = val_metric.get_name_value()[0][1] print('VACC: %f' % (acc_value)) highest_acc = [0.0, 0.0] # lfw and target # for i in range(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [40000, 60000, 80000] if args.loss_type >= 1 and args.loss_type <= 7: lr_steps = [100000, 140000, 160000] p = 512.0 / args.batch_size for l in range(len(lr_steps)): lr_steps[l] = int(lr_steps[l] * p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): # global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == args.beta_freeze + _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score if lfw_score >= 0.998: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score >= 0.99: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True # for i in range(len(acc_list)): # acc = acc_list[i] # if acc>=highest_acc[i]: # highest_acc[i] = acc # if lfw_score>=0.99: # do_save = True # if args.loss_type==1 and mbatch>lr_steps[-1] and mbatch%10000==0: # do_save = True if do_save: print('saving', msave) if val_dataiter is not None: val_test() arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) # if acc>=highest_acc[0]: # lfw_npy = "%s-lfw-%04d" % (prefix, msave) # X = np.concatenate(embeddings_list, axis=0) # print('saving lfw npy', X.shape) # np.save(lfw_npy, X) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max(args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) # print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) # epoch_cb = mx.callback.do_checkpoint(prefix, 1) epoch_cb = None # def _epoch_callback(epoch, sym, arg_params, aux_params): # print('epoch-end', epoch) model.fit(train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, # optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
# sub mean normed_img = sample - 128 normed_img /= 128. return np.reshape(normed_img, (1, 3, 299, 299)) start_time = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') prefix = "model/inception-7/Inception-7" num_round = 1 network = model.FeedForward.load(prefix, num_round, ctx=mx.cpu(), numpy_batch_size=bs) inner = network.symbol.get_internals() inner_feature = inner['flatten_output'] fea_ext = model.FeedForward(ctx=mx.cpu(), symbol=inner_feature, numpy_batch_size=bs, arg_params=network.arg_params, aux_params=network.aux_params, allow_extra_params=True) # biz_ph = pd.read_csv('../data/train_id.csv')
def main(): opt = parse_args() makedirs(opt.save_dir) filehandler = logging.FileHandler(os.path.join(opt.save_dir, opt.logging_file)) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) sw = SummaryWriter(logdir=opt.save_dir, flush_secs=5) batch_size = opt.batch_size classes = opt.num_classes num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) logger.info('Total batch size is set to %d on %d GPUs' % (batch_size, num_gpus)) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list(range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] optimizer = 'sgd' if opt.clip_grad > 0: optimizer_params = {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum, 'clip_gradient': opt.clip_grad} else: optimizer_params = {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum} model_name = opt.model net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained, tsn=opt.use_tsn, num_segments=opt.num_segments, partial_bn=opt.partial_bn) net.cast(opt.dtype) net.collect_params().reset_ctx(context) logger.info(net) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) train_data, val_data, batch_fn = get_data_loader(opt, batch_size, num_workers, logger) train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return (top1, top5) def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 if opt.partial_bn: train_patterns = None if 'inceptionv3' in opt.model: train_patterns = '.*weight|.*bias|inception30_batchnorm0_gamma|inception30_batchnorm0_beta|inception30_batchnorm0_running_mean|inception30_batchnorm0_running_var' else: logger.info('Current model does not support partial batch normalization.') trainer = gluon.Trainer(net.collect_params(train_patterns), optimizer, optimizer_params) else: trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) if opt.resume_states is not '': trainer.load_states(opt.resume_states) L = gluon.loss.SoftmaxCrossEntropyLoss() best_val_score = 0 lr_decay_count = 0 for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() train_metric.reset() btic = time.time() if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate * lr_decay) lr_decay_count += 1 for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) with ag.record(): outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) train_metric.update(label, outputs) if opt.log_interval and not (i+1) % opt.log_interval: train_metric_name, train_metric_score = train_metric.get() logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % ( epoch, i, batch_size*opt.log_interval/(time.time()-btic), train_metric_name, train_metric_score*100, trainer.learning_rate)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i /(time.time() - tic)) acc_top1_val, acc_top5_val = test(ctx, val_data) logger.info('[Epoch %d] training: %s=%f'%(epoch, train_metric_name, train_metric_score*100)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f'%(epoch, throughput, time.time()-tic)) logger.info('[Epoch %d] validation: acc-top1=%f acc-top5=%f'%(epoch, acc_top1_val*100, acc_top5_val*100)) sw.add_scalar(tag='train_acc', value=train_metric_score*100, global_step=epoch) sw.add_scalar(tag='valid_acc', value=acc_top1_val*100, global_step=epoch) if acc_top1_val > best_val_score: best_val_score = acc_top1_val if opt.use_tsn: net.basenet.save_parameters('%s/%.4f-ucf101-%s-%03d-best.params'%(opt.save_dir, best_val_score, model_name, epoch)) else: net.save_parameters('%s/%.4f-ucf101-%s-%03d-best.params'%(opt.save_dir, best_val_score, model_name, epoch)) trainer.save_states('%s/%.4f-ucf101-%s-%03d-best.states'%(opt.save_dir, best_val_score, model_name, epoch)) if opt.save_frequency and opt.save_dir and (epoch + 1) % opt.save_frequency == 0: if opt.use_tsn: net.basenet.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, epoch)) else: net.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, epoch)) trainer.save_states('%s/ucf101-%s-%03d.states'%(opt.save_dir, model_name, epoch)) # save the last model if opt.use_tsn: net.basenet.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, opt.num_epochs-1)) else: net.save_parameters('%s/ucf101-%s-%03d.params'%(opt.save_dir, model_name, opt.num_epochs-1)) trainer.save_states('%s/ucf101-%s-%03d.states'%(opt.save_dir, model_name, opt.num_epochs-1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) train(context) sw.close()
#### 使用softmax cross entropy loss算法 # Softmax和交叉熵损失函数 # softmax 回归实现 exp(Xi)/(sum(exp(Xi))) 归一化概率 使得 10类概率之和为1 # 交叉熵损失函数 将两个概率分布的负交叉熵作为目标值,最小化这个值等价于最大化这两个概率的相似度 # 计算模型的预测能力 softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() ### 优化模型 # 使用随机梯度下降算法(sgd)进行训练 # 并且将学习率的超参数设置为 .1 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .1}) epochs = 10 ##训练 for e in range(epochs):#每一次训练整个训练集 train_loss = 0.# 损失 train_acc = 0. #准确度 for i, (data, label) in enumerate(train_data): ##训练集里的 每一批次样本和标签 data = data.as_in_context(mx.cpu()).reshape((-1, 784)) ## 28*28 转成 1*784 label = label.as_in_context(mx.cpu()) with autograd.record(): # 自动求微分 output = net(data) # 模型输出 向前传播 loss = softmax_cross_entropy(output, label)## 计算误差 loss.backward() # 向后传播 trainer.step(data.shape[0]) # 优化模型参数 data.shape[0] = batch_size # Provide stats on the improvement of the model over each epoch train_loss += ndarray.mean(loss).asscalar() ## 当前的误差损失 均值 train_acc += utils.accuracy(output, label) #准确度 test_acc = utils.evaluate_accuracy(test_data, net)#验证数据集的准确度 print("遍历训练集次数 {}. 训练误差: {}. 训练准确度: {}. 测试准确度: {}.".format( e, train_loss/len(train_data),train_acc/len(train_data), test_acc))
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) image, label = cifar_train[0] plt.figure(class_names[int(label.item())]) plt.imshow(image.asnumpy()) plt.show() # # Get GPU num and split the data to each gpu # num_gpus = mx.context.num_gpus() if num_gpus != 0: ctx = [mx.gpu(i) for i in range(num_gpus)] else: ctx = cpu() per_device_batch_size = 128 num_workers = 2 batch_size = per_device_batch_size * num_gpus # laod n batches which will be splited by split_and_load train_data = gluon.data.DataLoader( cifar_train.transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch="rollover", num_workers=num_workers) val_data = gluon.data.DataLoader(cifar_test.transform_first(transform_test), batch_size=batch_size, shuffle=False,
if __name__ == "__main__": from data_processing import PreprocessContentImage, PreprocessStyleImage from data_processing import PostprocessImage, SaveImage vgg_params = mx.nd.load("./model/vgg19.params") style_weight = 2 content_weight = 10 long_edge = 384 content_np = PreprocessContentImage("./input/IMG_4343.jpg", long_edge) style_np = PreprocessStyleImage("./input/starry_night.jpg", shape=content_np.shape) dshape = content_np.shape ctx = mx.gpu() # style style_mod = get_style_module("style", dshape, ctx, vgg_params) style_mod.forward(mx.io.DataBatch([mx.nd.array(style_np)], [0]), is_train=False) style_array = [arr.copyto(mx.cpu()) for arr in style_mod.get_outputs()] del style_mod # content content_mod = get_content_module("content", dshape, ctx, vgg_params) content_mod.forward(mx.io.DataBatch([mx.nd.array(content_np)], [0]), is_train=False) content_array = content_mod.get_outputs()[0].copyto(mx.cpu()) del content_mod # loss mod, gscale = get_loss_module("loss", dshape, ctx, vgg_params) extra_args = {"target_gram_%d" % i : style_array[i] for i in range(len(style_array))} extra_args["target_content"] = content_array mod.set_params(extra_args, {}, True, True) grad_array = [] for i in range(len(style_array)): grad_array.append(mx.nd.ones((1,), ctx) * (float(style_weight) / gscale[i])) grad_array.append(mx.nd.ones((1,), ctx) * (float(content_weight)))
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = os.path.join(args.models_root, '%s-%s-%s' % (args.network, args.loss, args.dataset), 'model') prefix_dir = os.path.dirname(prefix) print('prefix', prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = config.image_shape[2] data_dir = config.dataset_path path_imgrec = None path_imglist = None image_size = config.image_shape[0:2] assert len(image_size) == 2 assert image_size[0] == image_size[1] print('image_size', image_size) print('num_classes', config.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") print('Called with argument:', args, config) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 if len(args.pretrained) == 0: arg_params = None aux_params = None sym = get_symbol(args) if config.net_name == 'spherenet': data_shape_dict = {'data': (args.per_batch_size, ) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym = get_symbol(args) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) val_dataiter = None if config.loss_name.find('triplet') >= 0: from triplet_image_iter import FaceImageIter triplet_params = [ config.triplet_bag_size, config.triplet_alpha, config.triplet_max_ap ] train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, ctx_num=args.ctx_num, images_per_identity=config.images_per_identity, triplet_params=triplet_params, mx_model=model, ) _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] else: from image_iter import FaceImageIter train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, color_jittering=args.color, images_filter=args.images_filter, ) metric1 = AccMetric() eval_metrics = [mx.metric.create(metric1)] if args.ce_loss: metric2 = LossValueMetric() eval_metrics.append(mx.metric.create(metric2)) if config.net_name == 'fresnet' or config.net_name == 'fmobilefacenet': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) ver_list = [] ver_name_list = [] for name in config.val_targets: path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for step in lr_steps: if mbatch == step: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False is_highest = False if len(acc_list) > 0: #lfw_score = acc_list[0] #if lfw_score>highest_acc[0]: # highest_acc[0] = lfw_score # if lfw_score>=0.998: # do_save = True score = sum(acc_list) if acc_list[-1] >= highest_acc[-1]: if acc_list[-1] > highest_acc[-1]: is_highest = True else: if score >= highest_acc[0]: is_highest = True highest_acc[0] = score highest_acc[-1] = acc_list[-1] #if lfw_score>=0.99: # do_save = True if is_highest: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt == 2: do_save = True elif args.ckpt == 3: msave = 1 if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None #train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def get_train_context(num_cpus, num_gpus): if num_gpus > 0: return mx.gpu() return mx.cpu()
def _set_ctx(self): try: a = mx.nd.zeros((1,), ctx=mx.gpu(0)) self.ctx = [mx.gpu(0)] except: self.ctx = [mx.cpu()]
# shuffle data X, y = shuffle(mnist.data, mnist.target) # split dataset train_data = X[:50000, :].astype('float32') train_label = y[:50000] val_data = X[50000: 60000, :].astype('float32') val_label = y[50000:60000] # Normalize data train_data[:] /= 256.0 val_data[:] /= 256.0 batch_size = 100 # or you can use numpy iterator, which make using model easier train_iter = mx.io.NDArrayIter(data=train_data, label=train_label, batch_size=batch_size, shuffle=True) val_iter = mx.io.NDArrayIter(data=val_data, label=val_label, batch_size=batch_size) logging.basicConfig(level=logging.DEBUG) model = mx.model.FeedForward( ctx = mx.cpu(), symbol = mlp, num_round = 20, learning_rate = 0.1, momentum = 0.9, wd = 0.00001) # train by using Numpy ndarray direcly model.fit(X=train_data, y=train_label) # train by using Numpy Iterator # model.fit(X=train_iter, eval_data=val_iter)
train_data = data[:2000] dev_data = data[-2000:] train_y = y[:2000] dev_y = y[-2000:] model_name = 'bert_12_768_12' dataset = 'book_corpus_wiki_en_uncased' batch_size = 32 seq_len = 64 pad = True tr_ds = ArrayDataset(train_data, train_y) dev_ds = ArrayDataset(dev_data, dev_y) vectorizer = TMNTVectorizer(vocab_size=2000) vectorizer.fit_transform(train_data) ctx = mx.cpu() ## or mx.gpu(N) if using GPU device=N #num_classes = int(np.max(y) + 1) num_classes = 0 tr_dataset, dev_dataset, num_examples, bert_base, bert_vocab, _ = get_bert_datasets( None, vectorizer, tr_ds, dev_ds, batch_size, seq_len, bert_model_name=model_name, bert_dataset=dataset, num_classes=num_classes, ctx=ctx)
def create(style_dataset, content_dataset, style_feature=None, content_feature=None, max_iterations=None, model='resnet-16', verbose=True, batch_size = 6, **kwargs): """ Create a :class:`StyleTransfer` model. Parameters ---------- style_dataset: SFrame Input style images. The columns named by the ``style_feature`` parameters will be extracted for training the model. content_dataset : SFrame Input content images. The columns named by the ``content_feature`` parameters will be extracted for training the model. style_feature: string Name of the column containing the input images in style SFrame. 'None' (the default) indicates the only image column in the style SFrame should be used as the feature. content_feature: string Name of the column containing the input images in content SFrame. 'None' (the default) indicates the only image column in the content SFrame should be used as the feature. max_iterations : int The number of training iterations. If 'None' (the default), then it will be automatically determined based on the amount of data you provide. model : string optional Style transfer model to use: - "resnet-16" : Fast and small-sized residual network that uses VGG-16 as reference network during training. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve training throughput. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : StyleTransfer A trained :class:`StyleTransfer` model. See Also -------- StyleTransfer Examples -------- .. sourcecode:: python # Create datasets >>> content_dataset = turicreate.image_analysis.load_images('content_images/') >>> style_dataset = turicreate.image_analysis.load_images('style_images/') # Train a style transfer model >>> model = turicreate.style_transfer.create(content_dataset, style_dataset) # Stylize an image on all styles >>> stylized_images = model.stylize(data) # Visualize the stylized images >>> stylized_images.explore() """ if len(style_dataset) == 0: raise _ToolkitError("style_dataset SFrame cannot be empty") if len(content_dataset) == 0: raise _ToolkitError("content_dataset SFrame cannot be empty") if(batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") if max_iterations is not None and (not isinstance(max_iterations, int) or max_iterations < 0): raise _ToolkitError("'max_iterations' must be an integer greater than or equal to 0") from ._sframe_loader import SFrameSTIter as _SFrameSTIter import mxnet as _mx from .._mxnet import _mxnet_utils if style_feature is None: style_feature = _tkutl._find_only_image_column(style_dataset) if content_feature is None: content_feature = _tkutl._find_only_image_column(content_dataset) if verbose: print("Using '{}' in style_dataset as feature column and using " "'{}' in content_dataset as feature column".format(style_feature, content_feature)) _raise_error_if_not_training_sframe(style_dataset, style_feature) _raise_error_if_not_training_sframe(content_dataset, content_feature) _tkutl._handle_missing_values(style_dataset, style_feature, 'style_dataset') _tkutl._handle_missing_values(content_dataset, content_feature, 'content_dataset') params = { 'batch_size': batch_size, 'vgg16_content_loss_layer': 2, # conv3_3 layer 'lr': 0.001, 'content_loss_mult': 1.0, 'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4], # conv 1-4 layers 'finetune_all_params': True, 'pretrained_weights': False, 'print_loss_breakdown': False, 'input_shape': (256, 256), 'training_content_loader_type': 'stretch', 'use_augmentation': False, 'sequential_image_processing': False, # Only used if use_augmentaion is True 'aug_resize': 0, 'aug_min_object_covered': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_area_range': (.05, 1.5), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, 'checkpoint': False, 'checkpoint_prefix': 'style_transfer', 'checkpoint_increment': 1000 } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) _content_loss_mult = params['content_loss_mult'] _style_loss_mult = params['style_loss_mult'] num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size']) batch_size_each = params['batch_size'] // max(num_gpus, 1) batch_size = max(num_gpus, 1) * batch_size_each input_shape = params['input_shape'] iterations = 0 if max_iterations is None or max_iterations==0: max_iterations = len(style_dataset) * 10000 if verbose: print('Setting max_iterations to be {}'.format(max_iterations)) # data loader if params['use_augmentation']: content_loader_type = '%s-with-augmentation' % params['training_content_loader_type'] else: content_loader_type = params['training_content_loader_type'] content_images_loader = _SFrameSTIter(content_dataset, batch_size, shuffle=True, feature_column=content_feature, input_shape=input_shape, loader_type=content_loader_type, aug_params=params, sequential=params['sequential_image_processing']) ctx = _mxnet_utils.get_mxnet_context(max_devices=params['batch_size']) num_styles = len(style_dataset) # TRANSFORMER MODEL from ._model import Transformer as _Transformer transformer_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[model]().get_model_path() transformer = _Transformer(num_styles, batch_size_each) transformer.collect_params().initialize(ctx=ctx) if params['pretrained_weights']: transformer.load_params(transformer_model_path, ctx, allow_missing=True) # For some reason, the transformer fails to hybridize for training, so we # avoid this until resolved # transformer.hybridize() # VGG MODEL from ._model import Vgg16 as _Vgg16 vgg_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS['Vgg16']().get_model_path() vgg_model = _Vgg16() vgg_model.collect_params().initialize(ctx=ctx) vgg_model.load_params(vgg_model_path, ctx=ctx, ignore_extra=True) vgg_model.hybridize() # TRAINER from mxnet import gluon as _gluon from ._model import gram_matrix as _gram_matrix if params['finetune_all_params']: trainable_params = transformer.collect_params() else: trainable_params = transformer.collect_params('.*gamma|.*beta') trainer = _gluon.Trainer(trainable_params, 'adam', {'learning_rate': params['lr']}) mse_loss = _gluon.loss.L2Loss() start_time = _time.time() smoothed_loss = None last_time = 0 cuda_gpus = _mxnet_utils.get_gpus_in_use(max_devices=params['batch_size']) num_mxnet_gpus = len(cuda_gpus) if verbose: # Estimate memory usage (based on experiments) cuda_mem_req = 260 + batch_size_each * 880 + num_styles * 1.4 _tkutl._print_neural_compute_device(cuda_gpus=cuda_gpus, use_mps=False, cuda_mem_req=cuda_mem_req, has_mps_impl=False) # # Pre-compute gram matrices for style images # if verbose: print('Analyzing visual features of the style images') style_images_loader = _SFrameSTIter(style_dataset, batch_size, shuffle=False, num_epochs=1, feature_column=style_feature, input_shape=input_shape, loader_type='stretch', sequential=params['sequential_image_processing']) num_layers = len(params['style_loss_mult']) gram_chunks = [[] for _ in range(num_layers)] for s_batch in style_images_loader: s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0) for s in s_data: vgg16_s = _vgg16_data_prep(s) ret = vgg_model(vgg16_s) grams = [_gram_matrix(x) for x in ret] for i, gram in enumerate(grams): if gram.context != _mx.cpu(0): gram = gram.as_in_context(_mx.cpu(0)) gram_chunks[i].append(gram) del style_images_loader grams = [ # The concatenated styles may be padded, so we slice overflow _mx.nd.concat(*chunks, dim=0)[:num_styles] for chunks in gram_chunks ] # A context->grams look-up table, where all the gram matrices have been # distributed ctx_grams = {} if ctx[0] == _mx.cpu(0): ctx_grams[_mx.cpu(0)] = grams else: for ctx0 in ctx: ctx_grams[ctx0] = [gram.as_in_context(ctx0) for gram in grams] style_sa = style_dataset[style_feature] idx_column = _tc.SArray(range(0, style_sa.shape[0])) style_sframe = _tc.SFrame({"style": idx_column, style_feature: style_sa}) # # Training loop # vgg_content_loss_layer = params['vgg16_content_loss_layer'] rs = _np.random.RandomState(1234) while iterations < max_iterations: content_images_loader.reset() for c_batch in content_images_loader: c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0) Ls = [] curr_content_loss = [] curr_style_loss = [] with _mx.autograd.record(): for c in c_data: # Randomize styles to train indices = _mx.nd.array(rs.randint(num_styles, size=batch_size_each), dtype=_np.int64, ctx=c.context) # Generate pastiche p = transformer(c, indices) # mean subtraction vgg16_p = _vgg16_data_prep(p) vgg16_c = _vgg16_data_prep(c) # vgg forward p_vgg_outputs = vgg_model(vgg16_p) c_vgg_outputs = vgg_model(vgg16_c) c_content_layer = c_vgg_outputs[vgg_content_loss_layer] p_content_layer = p_vgg_outputs[vgg_content_loss_layer] # Calculate Loss # Style Loss between style image and stylized image # Ls = sum of L2 norm of gram matrix of vgg16's conv layers style_losses = [] for gram, p_vgg_output, style_loss_mult in zip(ctx_grams[c.context], p_vgg_outputs, _style_loss_mult): gram_s_vgg = gram[indices] gram_p_vgg = _gram_matrix(p_vgg_output) style_losses.append(style_loss_mult * mse_loss(gram_s_vgg, gram_p_vgg)) style_loss = _mx.nd.add_n(*style_losses) # Content Loss between content image and stylized image # Lc = L2 norm at a single layer in vgg16 content_loss = _content_loss_mult * mse_loss(c_content_layer, p_content_layer) curr_content_loss.append(content_loss) curr_style_loss.append(style_loss) # Divide loss by large number to get into a more legible # range total_loss = (content_loss + style_loss) / 10000.0 Ls.append(total_loss) for L in Ls: L.backward() cur_loss = _np.mean([L.asnumpy()[0] for L in Ls]) if smoothed_loss is None: smoothed_loss = cur_loss else: smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss iterations += 1 if params['checkpoint'] and iterations % params['checkpoint_increment'] == 0: checkpoint_filename = params['checkpoint_prefix'] + "-" + str(iterations) + ".model" training_time = _time.time() - start_time state = { '_model': transformer, '_training_time_as_string': _seconds_as_string(training_time), 'batch_size': batch_size, 'num_styles': num_styles, 'model': model, 'input_image_shape': input_shape, 'styles': style_sframe, 'num_content_images': len(content_dataset), 'training_time': training_time, 'max_iterations': max_iterations, 'training_iterations': iterations, 'training_epochs': content_images_loader.cur_epoch, 'style_feature': style_feature, 'content_feature': content_feature, "_index_column": "style", 'training_loss': smoothed_loss, } st_model = StyleTransfer(state) st_model.save(checkpoint_filename) trainer.step(batch_size) if verbose and iterations == 1: # Print progress table header column_names = ['Iteration', 'Loss', 'Elapsed Time'] num_columns = len(column_names) column_width = max(map(lambda x: len(x), column_names)) + 2 hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+' print(hr) print(('| {:<{width}}' * num_columns + '|').format(*column_names, width=column_width-1)) print(hr) cur_time = _time.time() if verbose and (cur_time > last_time + 10 or iterations == max_iterations): # Print progress table row elapsed_time = cur_time - start_time print("| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|".format( cur_iter = iterations, loss = smoothed_loss, time = elapsed_time , width = column_width-1)) if params['print_loss_breakdown']: print_content_loss = _np.mean([L.asnumpy()[0] for L in curr_content_loss]) print_style_loss = _np.mean([L.asnumpy()[0] for L in curr_style_loss]) print('Total Loss: {:6.3f} | Content Loss: {:6.3f} | Style Loss: {:6.3f}'.format(cur_loss, print_content_loss, print_style_loss)) last_time = cur_time if iterations == max_iterations: print(hr) break training_time = _time.time() - start_time # Save the model state state = { '_model': transformer, '_training_time_as_string': _seconds_as_string(training_time), 'batch_size': batch_size, 'num_styles': num_styles, 'model': model, 'input_image_shape': input_shape, 'styles': style_sframe, 'num_content_images': len(content_dataset), 'training_time': training_time, 'max_iterations': max_iterations, 'training_iterations': iterations, 'training_epochs': content_images_loader.cur_epoch, 'style_feature': style_feature, 'content_feature': content_feature, "_index_column": "style", 'training_loss': smoothed_loss, } return StyleTransfer(state)
def run_train_translate( train_params: str, translate_params: str, translate_params_equiv: Optional[str], train_source_path: str, train_target_path: str, dev_source_path: str, dev_target_path: str, test_source_path: str, test_target_path: str, train_source_factor_paths: Optional[List[str]] = None, dev_source_factor_paths: Optional[List[str]] = None, test_source_factor_paths: Optional[List[str]] = None, use_prepared_data: bool = False, max_seq_len: int = 10, restrict_lexicon: bool = False, work_dir: Optional[str] = None, seed: int = 13, quiet: bool = False) -> Tuple[float, float, float, float]: """ Train a model and translate a dev set. Report validation perplexity and BLEU. :param train_params: Command line args for model training. :param translate_params: First command line args for translation. :param translate_params_equiv: Second command line args for translation. Should produce the same outputs :param train_source_path: Path to the source file. :param train_target_path: Path to the target file. :param dev_source_path: Path to the development source file. :param dev_target_path: Path to the development target file. :param test_source_path: Path to the test source file. :param test_target_path: Path to the test target file. :param train_source_factor_paths: Optional list of paths to training source factor files. :param dev_source_factor_paths: Optional list of paths to dev source factor files. :param test_source_factor_paths: Optional list of paths to test source factor files. :param use_prepared_data: Whether to use the prepared data functionality. :param max_seq_len: The maximum sequence length. :param restrict_lexicon: Additional translation run with top-k lexicon-based vocabulary restriction. :param work_dir: The directory to store the model and other outputs in. :param seed: The seed used for training. :param quiet: Suppress the console output of training and decoding. :return: A tuple containing perplexity, bleu scores for standard and reduced vocab decoding, chrf score. """ if quiet: quiet_arg = "--quiet" else: quiet_arg = "" with TemporaryDirectory(dir=work_dir, prefix="test_train_translate.") as work_dir: # Optionally create prepared data directory if use_prepared_data: prepared_data_path = os.path.join(work_dir, "prepared_data") params = "{} {}".format( sockeye.prepare_data.__file__, _PREPARE_DATA_COMMON.format(train_source=train_source_path, train_target=train_target_path, output=prepared_data_path, max_len=max_seq_len, quiet=quiet_arg)) if train_source_factor_paths is not None: params += _TRAIN_WITH_FACTORS_COMMON.format( source_factors=" ".join(train_source_factor_paths)) logger.info("Creating prepared data folder.") with patch.object(sys, "argv", params.split()): sockeye.prepare_data.main() # Train model model_path = os.path.join(work_dir, "model") params = "{} {} {}".format( sockeye.train.__file__, _TRAIN_PARAMS_PREPARED_DATA_COMMON.format( prepared_data=prepared_data_path, dev_source=dev_source_path, dev_target=dev_target_path, model=model_path, max_len=max_seq_len, quiet=quiet_arg), train_params) if dev_source_factor_paths is not None: params += _DEV_WITH_FACTORS_COMMON.format( dev_source_factors=" ".join(dev_source_factor_paths)) logger.info("Starting training with parameters %s.", train_params) with patch.object(sys, "argv", params.split()): sockeye.train.main() else: # Train model model_path = os.path.join(work_dir, "model") params = "{} {} {}".format( sockeye.train.__file__, _TRAIN_PARAMS_COMMON.format(train_source=train_source_path, train_target=train_target_path, dev_source=dev_source_path, dev_target=dev_target_path, model=model_path, max_len=max_seq_len, seed=seed, quiet=quiet_arg), train_params) if train_source_factor_paths is not None: params += _TRAIN_WITH_FACTORS_COMMON.format( source_factors=" ".join(train_source_factor_paths)) if dev_source_factor_paths is not None: params += _DEV_WITH_FACTORS_COMMON.format( dev_source_factors=" ".join(dev_source_factor_paths)) logger.info("Starting training with parameters %s.", train_params) with patch.object(sys, "argv", params.split()): sockeye.train.main() # run checkpoint decoder on 1% of dev data with open(dev_source_path) as dev_fd: num_dev_sent = sum(1 for _ in dev_fd) sample_size = min(1, int(num_dev_sent * 0.01)) cp_decoder = sockeye.checkpoint_decoder.CheckpointDecoder( context=mx.cpu(), inputs=[dev_source_path], references=dev_target_path, model=model_path, sample_size=sample_size, batch_size=2, beam_size=2) cp_metrics = cp_decoder.decode_and_evaluate() logger.info("Checkpoint decoder metrics: %s", cp_metrics) logger.info("Translating with parameters %s.", translate_params) # Translate corpus with the 1st params out_path = os.path.join(work_dir, "out.txt") params = "{} {} {}".format( sockeye.translate.__file__, _TRANSLATE_PARAMS_COMMON.format(model=model_path, input=test_source_path, output=out_path, quiet=quiet_arg), translate_params) if test_source_factor_paths is not None: params += _TRANSLATE_WITH_FACTORS_COMMON.format( input_factors=" ".join(test_source_factor_paths)) with patch.object(sys, "argv", params.split()): sockeye.translate.main() # Translate corpus with the 2nd params if translate_params_equiv is not None: out_path_equiv = os.path.join(work_dir, "out_equiv.txt") params = "{} {} {}".format( sockeye.translate.__file__, _TRANSLATE_PARAMS_COMMON.format(model=model_path, input=test_source_path, output=out_path_equiv, quiet=quiet_arg), translate_params_equiv) if test_source_factor_paths is not None: params += _TRANSLATE_WITH_FACTORS_COMMON.format( input_factors=" ".join(test_source_factor_paths)) with patch.object(sys, "argv", params.split()): sockeye.translate.main() # read-in both outputs, ensure they are the same with open(out_path, 'rt') as f: lines = f.readlines() with open(out_path_equiv, 'rt') as f: lines_equiv = f.readlines() assert all(a == b for a, b in zip(lines, lines_equiv)) # Test restrict-lexicon out_restrict_path = os.path.join(work_dir, "out-restrict.txt") if restrict_lexicon: # fast_align lex table ttable_path = os.path.join(work_dir, "ttable") generate_fast_align_lex(ttable_path) # Top-K lexicon lexicon_path = os.path.join(work_dir, "lexicon") params = "{} {}".format( sockeye.lexicon.__file__, _LEXICON_CREATE_PARAMS_COMMON.format(input=ttable_path, model=model_path, topk=20, lexicon=lexicon_path, quiet=quiet_arg)) with patch.object(sys, "argv", params.split()): sockeye.lexicon.main() # Translate corpus with restrict-lexicon params = "{} {} {} {}".format( sockeye.translate.__file__, _TRANSLATE_PARAMS_COMMON.format(model=model_path, input=test_source_path, output=out_restrict_path, quiet=quiet_arg), translate_params, _TRANSLATE_PARAMS_RESTRICT.format(lexicon=lexicon_path, topk=1)) if test_source_factor_paths is not None: params += _TRANSLATE_WITH_FACTORS_COMMON.format( input_factors=" ".join(test_source_factor_paths)) with patch.object(sys, "argv", params.split()): sockeye.translate.main() # test averaging points = sockeye.average.find_checkpoints(model_path=model_path, size=1, strategy='best', metric=C.PERPLEXITY) assert len(points) > 0 averaged_params = sockeye.average.average(points) assert averaged_params # get best validation perplexity metrics = sockeye.utils.read_metrics_file( path=os.path.join(model_path, C.METRICS_NAME)) perplexity = min(m[C.PERPLEXITY + '-val'] for m in metrics) with open(out_path, "r") as out: hypotheses = out.readlines() with open(test_target_path, "r") as ref: references = ref.readlines() assert len(hypotheses) == len(references) # compute metrics bleu = raw_corpus_bleu(hypotheses=hypotheses, references=references, offset=0.01) chrf = raw_corpus_chrf(hypotheses=hypotheses, references=references) bleu_restrict = None if restrict_lexicon: bleu_restrict = raw_corpus_bleu(hypotheses=hypotheses, references=references, offset=0.01) # Run BLEU cli eval_params = "{} {} ".format( sockeye.evaluate.__file__, _EVAL_PARAMS_COMMON.format(hypotheses=out_path, references=test_target_path, metrics="bleu chrf", quiet=quiet_arg), ) with patch.object(sys, "argv", eval_params.split()): sockeye.evaluate.main() return perplexity, bleu, bleu_restrict, chrf
help='directory of saved models') parser.add_argument('--resume-from', type=str, help='resume training from the model') parser.add_argument('--save-plot-dir', type=str, default='.', help='the path to save the history plot') opt = parser.parse_args() batch_size = opt.batch_size classes = 10 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) if opt.resume_from: net.load_params(opt.resume_from, ctx=context) optimizer = 'nag'
def compare(reqObj, para, root, img_to_compare, step, image_64_decode, actual_img_id): _, model_args, model_auxs = para ctx = mx.cpu(0) symbol = lightened_cnn_b_feature() sub_folders = os.listdir(root) # print("root",root) # print("sub_folders",sub_folders) is_match_found = False if_class_found = False if (len(sub_folders) > 0): for folder in sub_folders: # loop through all the files and folders if os.path.isdir( os.path.join(root, folder) ): # check whether the current object is a folder or not sub_folder = os.path.join(root, folder) # print("subfolder", sub_folder) classId = folder # print("folder",folder) for img in os.listdir(sub_folder): imgpath = os.path.join(sub_folder, img) pathB = imgpath model_args['data'] = mx.nd.array( read2img(root, img_to_compare, pathB, 128, ctx), ctx) exector = symbol.bind(ctx, model_args, args_grad=None, grad_req="null", aux_states=model_auxs) exector.forward(is_train=False) exector.outputs[0].wait_to_read() output = exector.outputs[0].asnumpy() dis = np.dot(output[0], output[1]) / np.linalg.norm( output[0]) / np.linalg.norm(output[1]) # print("--------Score------", dis) if (dis > 0.60): if step == 2: # s3url = uploadImageToS3(image_64_decode,actual_img_id) stoteMainImageOnDisk( os.environ.get("MAIN_IMAGES_STORE_PATH"), folder, image_64_decode) col.generateCollage(folderPath=os.environ.get( "MAIN_IMAGES_STORE_PATH") + '/' + classId, width=800, height=250, shuffle=True, classid=classId) # classId = insertInImageByName(reqObj,folder,s3url) is_match_found = True if_class_found = True # print("matched Class",classId) break if is_match_found == True: break if is_match_found == False and (step == 1): classId = storeImageOnDisk(root, img_to_compare, step) if_class_found = True elif (step == 1): classId = storeImageOnDisk(root, img_to_compare, step) if_class_found = True if if_class_found == True: # print("step",step) resp = {} resp['status'] = 'success' if step == 1: resp['classId'] = classId # b64 = base64.b64encode(img_to_compare) # b64decodestring = base64.decodestring(b64) # q = np.frombuffer(b64decodestring, dtype=np.float64) # resp['thumbnail'] = q if step == 2: resp['classId'] = classId # resp['image_url'] = s3url if step == 3: resp['classId'] = classId resp['folderPath'] = os.environ.get( "MAIN_IMAGES_STORE_PATH") + '/' + classId else: resp = {} resp['status'] = 'error' resp['message'] = "No Class Found" return resp
parser.add_argument( '--pretrained', type=str, default='True', help= 'Load weights from previously saved parameters. You can specify parameter file name.' ) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() # context list ctx = [mx.gpu(int(i)) for i in args.gpus.split(',') if i.strip()] ctx = [mx.cpu()] if not ctx else ctx # grab some image if not specified if not args.images.strip(): gcv.utils.download( 'https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/biking.jpg?raw=true', 'biking.jpg') image_list = ['biking.jpg'] else: image_list = [x.strip() for x in args.images.split(',') if x.strip()] if args.pretrained.lower() in ['true', '1', 'yes', 't']: net = gcv.model_zoo.get_model(args.network, pretrained=True) else: net = gcv.model_zoo.get_model(args.network, pretrained=False,
def main(): opt = parse_args() filehandler = logging.FileHandler(opt.logging_file) streamhandler = logging.StreamHandler() logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers # epoch_start_cs controls that before this epoch, use all channels, while, after this epoch, use channel selection. if opt.epoch_start_cs != -1: opt.use_all_channels = True lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = { 'ctx': context, 'pretrained': opt.use_pretrained, 'classes': classes } if opt.use_gn: from gluoncv.nn import GroupNorm kwargs['norm_layer'] = GroupNorm if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se if opt.last_gamma: kwargs['last_gamma'] = True optimizer = 'nag' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler } if opt.dtype != 'float32': optimizer_params['multi_precision'] = True if model_name == 'ShuffleNas_fixArch': architecture = [ 0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2 ] scale_ids = [ 6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3 ] net = get_shufflenas_oneshot( architecture=architecture, n_class=classes, scale_ids=scale_ids, use_se=opt.use_se, last_conv_after_pooling=opt.last_conv_after_pooling) elif model_name == 'ShuffleNas': net = get_shufflenas_oneshot( n_class=classes, use_all_blocks=opt.use_all_blocks, use_se=opt.use_se, last_conv_after_pooling=opt.last_conv_after_pooling) else: net = get_model(model_name, **kwargs) net.cast(opt.dtype) if opt.resume_params is not '': net.load_parameters(opt.resume_params, ctx=context) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher teacher = get_model(teacher_name, pretrained=True, classes=classes, ctx=context) teacher.cast(opt.dtype) distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_train_idx, rec_val, rec_val_idx, batch_size, num_workers): rec_train = os.path.expanduser(rec_train) rec_train_idx = os.path.expanduser(rec_train_idx) rec_val = os.path.expanduser(rec_val) rec_val_idx = os.path.expanduser(rec_val_idx) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) mean_rgb = [123.68, 116.779, 103.939] std_rgb = [58.393, 57.12, 57.375] def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) return data, label train_data = mx.io.ImageRecordIter( path_imgrec=rec_train, path_imgidx=rec_train_idx, preprocess_threads=num_workers, shuffle=True, batch_size=batch_size, data_shape=(3, input_size, input_size), mean_r=mean_rgb[0], mean_g=mean_rgb[1], mean_b=mean_rgb[2], std_r=std_rgb[0], std_g=std_rgb[1], std_b=std_rgb[2], rand_mirror=True, random_resized_crop=True, max_aspect_ratio=4. / 3., min_aspect_ratio=3. / 4., max_random_area=1, min_random_area=0.08, brightness=jitter_param, saturation=jitter_param, contrast=jitter_param, pca_noise=lighting_param, ) val_data = mx.io.ImageRecordIter( path_imgrec=rec_val, path_imgidx=rec_val_idx, preprocess_threads=num_workers, shuffle=False, batch_size=batch_size, resize=resize, data_shape=(3, input_size, input_size), mean_r=mean_rgb[0], mean_g=mean_rgb[1], mean_b=mean_rgb[2], std_r=std_rgb[0], std_g=std_rgb[1], std_b=std_rgb[2], ) return train_data, val_data, batch_fn def get_data_loader(data_dir, batch_size, num_workers): normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 input_size = opt.input_size crop_ratio = opt.crop_ratio if opt.crop_ratio > 0 else 0.875 resize = int(math.ceil(input_size / crop_ratio)) def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label transform_train = transforms.Compose([ transforms.RandomResizedCrop(input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.Resize(resize, keep_ratio=True), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize ]) train_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader(imagenet.classification.ImageNet( data_dir, train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn if opt.use_rec: train_data, val_data, batch_fn = get_data_rec(opt.rec_train, opt.rec_train_idx, opt.rec_val, opt.rec_val_idx, batch_size, num_workers) else: train_data, val_data, batch_fn = get_data_loader( opt.data_dir, batch_size, num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) y2 = l[::-1].one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) res.append(lam * y1 + (1 - lam) * y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) smoothed.append(res) return smoothed def test(ctx, val_data, epoch): if opt.use_rec: val_data.reset() acc_top1.reset() acc_top5.reset() for i, batch in enumerate(val_data): data, label = batch_fn(batch, ctx) if model_name == 'ShuffleNas': # For evaluating validation accuracy, random select block and channels. block_choices = net.random_block_choices( select_predefined_block=False, dtype=opt.dtype) if opt.cs_warm_up: full_channel_mask, _ = net.random_channel_mask( select_all_channels=opt.use_all_channels, epoch_after_cs=epoch - opt.epoch_start_cs, dtype=opt.dtype) else: full_channel_mask, _ = net.random_channel_mask( select_all_channels=opt.use_all_channels, dtype=opt.dtype) outputs = [ net(X.astype(opt.dtype, copy=False), block_choices, full_channel_mask) for X in data ] else: outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return 1 - top1, 1 - top5 def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params is '': if 'ShuffleNas' in model_name: net._initialize(ctx=ctx) else: net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params) if opt.resume_states is not '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss( temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=sparse_label_loss) best_val_score = 1 for epoch in range(opt.resume_epoch, opt.num_epochs): if epoch == opt.epoch_start_cs: opt.use_all_channels = False tic = time.time() if opt.use_rec: train_data.reset() train_metric.reset() btic = time.time() for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam * X + (1 - lam) * X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ for X in data] with ag.record(): if model_name == 'ShuffleNas': block_choices = net.random_block_choices( select_predefined_block=False, dtype=opt.dtype) if opt.cs_warm_up: full_channel_mask, _ = net.random_channel_mask( select_all_channels=opt.use_all_channels, epoch_after_cs=epoch - opt.epoch_start_cs, dtype=opt.dtype) else: full_channel_mask, _ = net.random_channel_mask( select_all_channels=opt.use_all_channels, dtype=opt.dtype) outputs = [ net(X.astype(opt.dtype, copy=False), block_choices, full_channel_mask) for X in data ] else: outputs = [ net(X.astype(opt.dtype, copy=False)) for X in data ] if distillation: loss = [ L(yhat.astype('float32', copy=False), y.astype('float32', copy=False), p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob) ] else: loss = [ L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label) ] for l in loss: l.backward() trainer.step(batch_size, ignore_stale_grad=True) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) if opt.log_interval and not (i + 1) % opt.log_interval: train_metric_name, train_metric_score = train_metric.get() logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % (epoch, i, batch_size * opt.log_interval / (time.time() - btic), train_metric_name, train_metric_score, trainer.learning_rate)) btic = time.time() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data, epoch) logger.info('[Epoch %d] training: %s=%f' % (epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' % (epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters( '%s/%.4f-imagenet-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) trainer.save_states( '%s/%.4f-imagenet-%s-%d-best.states' % (save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, opt.num_epochs - 1)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=True) if distillation: teacher.hybridize(static_alloc=True, static_shape=True) train(context)