def __init__(self): super(MulAddWithParam, self).__init__() self.mul_add = MulAdd() self.param = Parameter(Tensor(np.array([[3, 2]], np.float32)), 'param')
def test_if_while(): x = Tensor(np.random.randn(1, 16, 12, 12).astype(np.float32)) z = Tensor(np.random.randn(1, 16, 16, 16).astype(np.float32)) res = if_while(Tensor(np.ones([1]).astype(np.float32)), Tensor(np.ones([1]).astype(np.float32)), x, z) assert np.all(res.asnumpy() == np.ones([64, 10]).astype(np.float32) * 4.0)
def test_zeros(): """ test_zeros """ x = Tensor(np.ones([2, 3]).astype(np.int32)) res = zero_like_tensor(x) assert np.all(res.asnumpy() == np.zeros([2, 3]).astype(np.int32))
def test_f(): """ test_f """ res = mainf(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32)) assert res == (2, 3)
def test_if_tensor(): res = if_tensor(Tensor(np.ones([1]).astype(np.int32)), Tensor(np.ones([1]).astype(np.int32))) assert res == Tensor(np.ones([1]).astype(np.int32) * 4)
def test_bert_percision(enable_graph_kernel=False): """test bert percision""" context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", reserve_class_name_in_scope=False) if enable_graph_kernel: context.set_context(enable_graph_kernel=True) ds, new_repeat_count, _ = me_de_train_dataset() version = os.getenv('VERSION', 'large') config = get_config(version=version) netwithloss = BertNetworkWithLoss(config, True) lr = BertLearningRate(decay_steps=ds.get_dataset_size() * new_repeat_count, learning_rate=5e-5, end_learning_rate=1e-9, power=10.0, warmup_steps=0) decay_filter = lambda x: 'layernorm' not in x.name.lower( ) and 'bias' not in x.name.lower() no_decay_filter = lambda x: 'layernorm' in x.name.lower( ) or 'bias' in x.name.lower() decay_params = list(filter(decay_filter, netwithloss.trainable_params())) other_params = list(filter(no_decay_filter, netwithloss.trainable_params())) group_params = [{ 'params': decay_params, 'weight_decay': 0.01 }, { 'params': other_params }, { 'order_params': netwithloss.trainable_params() }] optimizer = Lamb(group_params, lr) scale_window = 3 scale_manager = DynamicLossScaleManager(2**16, 2, scale_window) netwithgrads = BertTrainOneStepWithLossScaleCell( netwithloss, optimizer=optimizer, scale_update_cell=scale_manager.get_update_cell()) netwithgrads.set_train(True) model = Model(netwithgrads) callback = ModelCallback() params = netwithloss.trainable_params() for param in params: value = param.data name = param.name if isinstance(value, Tensor): if name.split('.')[-1] in ['weight']: if name.split('.')[-3] in ['cls2']: logger.info( "***************** BERT param name is 1 {}".format( name)) param.set_data(weight_variable(value.asnumpy().shape)) else: logger.info( "***************** BERT param name is 2 {}".format( name)) tempshape = value.asnumpy().shape shape = (tempshape[1], tempshape[0]) weight_value = weight_variable(shape).asnumpy() param.set_data(Tensor(np.transpose(weight_value, [1, 0]))) else: logger.info( "***************** BERT param name is 3 {}".format(name)) param.set_data(weight_variable(value.asnumpy().shape)) model.train(new_repeat_count, ds, callbacks=callback, dataset_sink_mode=False) # assertion occurs while the loss value, overflow state or loss_scale value is wrong loss_value = np.array(callback.loss_list) assert np.allclose(loss_value[0], 12.2065868, 0, 0.000001) expect_loss_value = [ 12.2065868, 11.8651543, 11.8282356, 11.8266964, 11.8210478, 12.4073524, 12.0055466, 12.6212320, 12.2229223, 12.4272099 ] print("loss value: {}".format(loss_value)) assert np.allclose(loss_value, expect_loss_value, 0, 0.0005) overflow = np.array(callback.overflow_list) expect_overflow = [ False, False, False, True, False, False, False, True, False, False ] print("overflow: {}".format(overflow)) assert (overflow == expect_overflow).all() loss_scale = np.array(callback.lossscale_list) expect_loss_scale = [ 65536.0, 65536.0, 131072.0, 65536.0, 65536.0, 65536.0, 131072.0, 65536.0, 65536.0, 65536.0 ] print("loss scale: {}".format(loss_scale)) assert np.allclose(loss_scale, expect_loss_scale, 0, 0)
def __init__(self): super(Net, self).__init__() self.z = Parameter(Tensor(np.ones([2]).astype(np.float32)), name='z')
def test_simple_while(): output = simple_while(c1, c2, c3) expect = Tensor([21], mstype.int32) assert output == expect
def test_while_in_while_in_while(): output = while_in_while_in_while(c1, c2, c3) expect = Tensor([2534], mstype.int32) assert output == expect
def test_simple_if(): output = simple_if(c1, c2, c3) expect = Tensor([6], mstype.int32) assert output == expect
def test_if_in_if(): output = if_in_if(c1, c2, c3) expect = Tensor([7], mstype.int32) assert output == expect
""" test_multigraph_sink """ import pytest import numpy as np import mindspore.nn as nn import mindspore.context as context from mindspore.common.tensor import Tensor from mindspore.common import dtype as mstype from mindspore.common import ms_function from mindspore.ops import operations as P def setup_module(module): context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") c1 = Tensor([2], mstype.int32) c2 = Tensor([14], mstype.int32) c3 = Tensor([1], mstype.int32) c4 = Tensor([0], mstype.int32) c5 = Tensor([14], mstype.int32) @ms_function def simple_if(x, y, z): if x < y: x = x + 1 else: x = x + 2 x = x + 3 return x
def weight_variable_1(shape): ones = np.ones(shape).astype(np.float32) return Tensor(ones)
def weight_variable_0(shape): zeros = np.zeros(shape).astype(np.float32) return Tensor(zeros)
def zeros_like_tensor(x): """Implement `zeros_like_tensor`.""" x = x.asnumpy() value = Tensor(np.zeros(x.shape)) return value
def _get_kernel_matrix(x_shape_nchw, kernel_matrix_shape, padding, x_dtype): kernel_matrix = np.ones(kernel_matrix_shape) return Tensor(kernel_matrix, x_dtype)
def weight_variable(shape): """weight variable""" np.random.seed(1) ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32) return Tensor(ones)
def _range_op(start, limit, delta, dtype): """helper function for Grad TopK""" output_tensor = Tensor(list(range(start, limit, delta)), dtype) return output_tensor
kernel_size, mode=1, pad_mode="pad", pad=0, stride=1, dilation=2, group=1) self.w = Parameter(Tensor(np.ones([16, 16, 3, 3]).astype(np.float32)), name='w') def construct(self, x): return self.conv(x, self.w) conv = ConvNet() c1 = Tensor([2], mstype.float32) c2 = Tensor([10], mstype.float32) c3 = Tensor([1], mstype.float32) @ms_function def t1_while(x, y, z): out = x i = c1 while i < c2: out = out + conv(z) i = i + c3 out = out + out return out
def test_bert_performance(): """test bert performance""" context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", reserve_class_name_in_scope=False) ds, new_repeat_count, sink_size = me_de_train_dataset(sink_mode=True) version = os.getenv('VERSION', 'large') config = get_config(version=version) netwithloss = BertNetworkWithLoss(config, True) lr = BertLearningRate(decay_steps=sink_size * new_repeat_count, learning_rate=5e-5, end_learning_rate=1e-9, power=10.0, warmup_steps=0) decay_filter = lambda x: 'layernorm' not in x.name.lower( ) and 'bias' not in x.name.lower() no_decay_filter = lambda x: 'layernorm' in x.name.lower( ) or 'bias' in x.name.lower() decay_params = list(filter(decay_filter, netwithloss.trainable_params())) other_params = list(filter(no_decay_filter, netwithloss.trainable_params())) group_params = [{ 'params': decay_params, 'weight_decay': 0.01 }, { 'params': other_params }, { 'order_params': netwithloss.trainable_params() }] optimizer = Lamb(group_params, lr) scale_window = 3 scale_manager = DynamicLossScaleManager(2**16, 2, scale_window) netwithgrads = BertTrainOneStepWithLossScaleCell( netwithloss, optimizer=optimizer, scale_update_cell=scale_manager.get_update_cell()) netwithgrads.set_train(True) model = Model(netwithgrads) callback = ModelCallback() params = netwithloss.trainable_params() for param in params: value = param.data name = param.name if isinstance(value, Tensor): if name.split('.')[-1] in ['weight']: if name.split('.')[-3] in ['cls2']: logger.info( "***************** BERT param name is 1 {}".format( name)) param.set_data(weight_variable(value.asnumpy().shape)) else: logger.info( "***************** BERT param name is 2 {}".format( name)) tempshape = value.asnumpy().shape shape = (tempshape[1], tempshape[0]) weight_value = weight_variable(shape).asnumpy() param.set_data(Tensor(np.transpose(weight_value, [1, 0]))) else: logger.info( "***************** BERT param name is 3 {}".format(name)) param.set_data(weight_variable(value.asnumpy().shape)) time_monitor_callback = TimeMonitor(sink_size) model.train(new_repeat_count, ds, callbacks=[time_monitor_callback, callback], dataset_sink_mode=True, sink_size=sink_size) # assertion occurs while the loss value, overflow state or loss_scale value is wrong loss_value = np.array(callback.loss_list) expect_loss_value = [10.235566, 10.207392, 10.206976] print("loss value: {}".format(loss_value)) assert np.allclose(loss_value, expect_loss_value, 0, 0.0005) overflow = np.array(callback.overflow_list) expect_overflow = [True, True, True] print("overflow: {}".format(overflow)) assert (overflow == expect_overflow).all() loss_scale = np.array(callback.lossscale_list) expect_loss_scale = [262144.0, 262144.0, 262144.0] print("loss scale: {}".format(loss_scale)) assert np.allclose(loss_scale, expect_loss_scale, 0, 0) epoch_mseconds = np.array(time_monitor_callback.epoch_mseconds_list)[2] expect_epoch_mseconds = 1400 print("epoch mseconds: {}".format(epoch_mseconds)) assert epoch_mseconds <= expect_epoch_mseconds + 5 per_step_mseconds = np.array( time_monitor_callback.per_step_mseconds_list)[2] expect_per_step_mseconds = 14 print("per step mseconds: {}".format(per_step_mseconds)) assert per_step_mseconds <= expect_per_step_mseconds + 1
def __init__(self, mul_size): super().__init__() self.mul_weight = Tensor(np.full(mul_size, 0.6, dtype=np.float32)) self.mul = P.Mul()
def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None): """ Training process. The data would be passed to network directly. Args: epoch (int): Total number of iterations on the data. train_dataset (Dataset): A training dataset iterator. If there is no loss_fn, a tuple with multiply data (data1, data2, data3, ...) should be returned and passed to the network. Otherwise, a tuple (data, label) should be returned, and the data and label are passed to the network and loss function respectively. list_callback (Callback): Executor of callback list. Default: None. cb_params (_InternalCallbackParam): Callback parameters. Default: None. """ dataset_helper, _ = self._exec_preprocess(self._train_network, is_train=True, phase='train', dataset=train_dataset, dataset_sink_mode=False) cb_params.cur_step_num = 0 run_context = RunContext(cb_params) list_callback.begin(run_context) # used to stop training for early stop, such as stopAtTIme or stopATStep should_stop = False for i in range(epoch): cb_params.cur_epoch_num = i + 1 list_callback.epoch_begin(run_context) for next_element in dataset_helper: len_element = len(next_element) if self._loss_fn and len_element != 2: raise ValueError( "when loss_fn is not None, train_dataset should" "return two elements, but got {}".format(len_element)) cb_params.cur_step_num += 1 list_callback.step_begin(run_context) overflow = False if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update( ): scaling_sens = self._get_scaling_sens() next_element = tuple(next_element) + (Tensor( scaling_sens, mstype.float32), ) outputs = self._train_network(*next_element) cb_params.net_outputs = outputs if self._loss_scale_manager and self._loss_scale_manager.get_drop_overflow_update( ): _, overflow, _ = outputs overflow = np.all(overflow.asnumpy()) self._loss_scale_manager.update_loss_scale(overflow) list_callback.step_end(run_context) should_stop = should_stop or run_context.get_stop_requested() if should_stop: break train_dataset.reset() list_callback.epoch_end(run_context) should_stop = should_stop or run_context.get_stop_requested() if should_stop: break list_callback.end(run_context)
def test_grad_add_mul(): """ test_grad_add_mul """ res = grad_add_mul(Tensor(3, dtype=ms.int32), Tensor(2, dtype=ms.int32)) assert res == (2, 7)
def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals): super(BboxAssignSampleForRcnn, self).__init__() cfg = config self.use_ambigous_sample = cfg.use_ambigous_sample self.batch_size = batch_size self.neg_iou_thr = cfg.neg_iou_thr_stage2 self.pos_iou_thr = cfg.pos_iou_thr_stage2 self.min_pos_iou = cfg.min_pos_iou_stage2 self.num_gts = cfg.num_gts self.num_bboxes = num_bboxes self.num_expected_pos = cfg.num_expected_pos_stage2 self.num_expected_amb = cfg.num_expected_amb_stage2 self.num_expected_neg = cfg.num_expected_neg_stage2 self.num_expected_total = cfg.num_expected_total_stage2 self.add_gt_as_proposals = add_gt_as_proposals self.label_inds = Tensor( np.arange(1, self.num_gts + 1).astype(np.int32)) self.add_gt_as_proposals_valid = Tensor( np.array(self.add_gt_as_proposals * np.ones(self.num_gts), dtype=np.int32)) self.concat = P.Concat(axis=0) self.max_gt = P.ArgMaxWithValue(axis=0) self.max_anchor = P.ArgMaxWithValue(axis=1) self.sum_inds = P.ReduceSum() self.iou = P.IOU() self.greaterequal = P.GreaterEqual() self.greater = P.Greater() self.select = P.Select() self.gatherND = P.GatherNd() self.gatherV2 = P.Gather() self.squeeze = P.Squeeze() self.cast = P.Cast() self.logicaland = P.LogicalAnd() self.less = P.Less() self.random_choice_with_mask_pos = P.RandomChoiceWithMask( self.num_expected_pos) self.random_choice_with_mask_amb = P.RandomChoiceWithMask( self.num_expected_amb) self.random_choice_with_mask_neg = P.RandomChoiceWithMask( self.num_expected_neg) self.reshape = P.Reshape() self.equal = P.Equal() self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(0.1, 0.1, 0.2, 0.2)) self.concat_axis1 = P.Concat(axis=1) self.logicalnot = P.LogicalNot() self.tile = P.Tile() # Check self.check_gt_one = Tensor( np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float32)) self.check_anchor_two = Tensor( np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float32)) # Init tensor self.assigned_gt_inds = Tensor( np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) self.assigned_gt_zeros = Tensor( np.array(np.zeros(num_bboxes), dtype=np.int32)) self.assigned_gt_ones = Tensor( np.array(np.ones(num_bboxes), dtype=np.int32)) self.assigned_amb = Tensor( np.array(-3 * np.ones(num_bboxes), dtype=np.int32)) self.assigned_gt_ignores = Tensor( np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) self.assigned_pos_ones = Tensor( np.array(np.ones(self.num_expected_pos), dtype=np.int32)) self.gt_ignores = Tensor( np.array(-1 * np.ones(self.num_gts), dtype=np.int32)) self.range_pos_size = Tensor( np.arange(self.num_expected_pos).astype(np.float32)) self.range_amb_size = Tensor( np.arange(self.num_expected_amb).astype(np.float32)) self.check_neg_mask = Tensor( np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) if self.use_ambigous_sample: self.check_neg_mask = Tensor( np.array( np.ones(self.num_expected_neg - self.num_expected_pos - self.num_expected_amb), dtype=np.bool)) check_neg_mask_ignore_end = np.array(np.ones(self.num_expected_neg), dtype=np.bool) check_neg_mask_ignore_end[-1] = False self.check_neg_mask_ignore_end = Tensor(check_neg_mask_ignore_end) self.bboxs_neg_mask = Tensor( np.zeros((self.num_expected_neg, 4), dtype=np.float32)) self.bboxs_amb_mask = Tensor( np.zeros((self.num_expected_amb, 4), dtype=np.float32)) self.labels_neg_mask = Tensor( np.array(np.zeros(self.num_expected_neg), dtype=np.uint8)) self.labels_amb_mask = Tensor( np.array(np.zeros(self.num_expected_amb) + 2, dtype=np.uint8)) self.reshape_shape_pos = (self.num_expected_pos, 1) self.reshape_shape_amb = (self.num_expected_amb, 1) self.reshape_shape_neg = (self.num_expected_neg, 1) self.scalar_zero = Tensor(0.0, dtype=mstype.float32) self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=mstype.float32) self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=mstype.float32) self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=mstype.float32)
def test_grad_if(): """ test_grad_if """ assert grad_if(Tensor(5, dtype=ms.int32), Tensor(4, dtype=ms.int32)) == (3, 0)
import numpy as np from mindspore import context from mindspore.common import dtype as mstype from mindspore.common.initializer import initializer from mindspore.ops import operations as P from mindspore.ops import composite as C from mindspore.ops import functional as F from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor from mindspore._checkparam import Validator as validator from mindspore._checkparam import Rel from .optimizer import Optimizer from .. import layer from .. import _graph_kernels as G num_one = Tensor(np.ones([1]), mstype.float32) _lamb_opt = C.MultitypeFuncGraph("lamb_opt") @_lamb_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", "Tensor", "Tensor", "Bool", "Bool") def _update_run_op(beta1, beta2, eps, global_step, lr, weight_decay, param, m, v, gradient, decay_flag, optim_filter): """ Update parameters. Args: beta1 (Tensor): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0). beta2 (Tensor): The exponential decay rate for the 2nd moment estimations. Should be in range (0.0, 1.0). eps (Tensor): Term added to the denominator to improve numerical stability. Should be greater than 0. lr (Tensor): Learning rate. weight_decay (Number): Weight decay. Should be equal to or greater than 0.
def test_grad_while(): """ test_grad_while """ assert grad_while(Tensor(5, dtype=ms.int32)) == (60, )
def tuple_to_array(x): """Implement `tuple_to_array`.""" return Tensor(np.array(x))
def test_arithmetic_simplify_07(): """ test_arithmetic_simplify_07 """ x = Tensor(np.array([[1, 2, 3], [4, 5, 6]]).astype(np.int32)) res = arithmetic_simplify_07(x) expect = np.array([[20, 30, 40], [50, 60, 70]]).astype(np.int32) assert np.all(res.asnumpy() == expect)
def test_grad_one_input_bprop(): net = OneInputBprop() input = Tensor(np.ones([2, 2]).astype(np.float32)) grad = C.grad_all(net)(input) assert (grad[0].asnumpy() == np.array([5, 5]).astype(np.float32)).all()