def construct(self, gradients, overflow): """AdamWeightDecayForBert""" lr = self.get_lr() cond = self.op_cast(P.Fill()(ts.int32, self.op_shape(self.beta1), 1) *\ self.op_reshape(overflow, (())), ts.bool_) beta1 = self.op_select(cond, self.op_cast(ts.array((1.0, )), ts.float32), self.beta1) beta2 = self.op_select(cond, self.op_cast(ts.array((1.0, )), ts.float32), self.beta2) if self.is_group: if self.is_group_lr: optim_result = self.hyper_map( P.Partial()(_adam_opt, self.beta1, self.beta2, self.eps), lr, self.weight_decay, self.parameters, self.moments1, self.moments2, gradients, self.decay_flags, self.optim_filter) else: optim_result = self.hyper_map( P.Partial()(_adam_opt, beta1, beta2, self.eps, lr, overflow), self.weight_decay, self.parameters, self.moments1, self.moments2, gradients, self.decay_flags, self.optim_filter) else: optim_result = self.hyper_map( P.Partial()(_adam_opt, self.beta1, self.beta2, self.eps, lr, self.weight_decay), self.parameters, self.moments1, self.moments2, gradients, self.decay_flags, self.optim_filter) if self.use_parallel: self.broadcast_params(optim_result) return optim_result
def __init__(self, learning_rate, end_learning_rate, warmup_steps, decay_steps, power): super(BertLearningRate, self).__init__() self.warmup_flag = False if warmup_steps > 0: self.warmup_flag = True self.warmup_lr = WarmUpLR(learning_rate, warmup_steps) self.decay_lr = PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps, power) self.warmup_steps = ts.array([warmup_steps], dtype=ts.float32) self.greater = P.Greater() self.one = ts.array([1.0], dtype=ts.float32) self.cast = P.Cast()
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0): super(AdamWeightDecayOp, self).__init__(learning_rate, params, weight_decay) _check_param_value(beta1, beta2, eps, self.cls_name) self.beta1 = ts.array([beta1], dtype=ts.float32) self.beta2 = ts.array([beta2], dtype=ts.float32) self.eps = ts.array([eps], dtype=ts.float32) self.moments1 = self.parameters.clone(prefix="adam_m", init='zeros') self.moments2 = self.parameters.clone(prefix="adam_v", init='zeros') self.hyper_map = P.HyperMap()
def convert2tensor(self, transform_input): r""" Convert the numpy data to the tensor format. Args: transform_input (numpy.ndarray): the preprocessing image. Returns: Tensor, the converted image. """ if not isinstance(transform_input, np.ndarray): err_msg = 'The transform_input type should be numpy.ndarray, got {}.'.format( type(transform_input)) raise TypeError(err_msg) input_tensor = ts.expand_dims(ts.array(list(transform_input)), 0) return input_tensor
def construct(self): """Generates matrix of relative positions between inputs.""" range_vec_row_out = self.cast(ts.array(ts.arange(self._length)), int32) range_vec_col_out = self.range_mat(range_vec_row_out, (self._length, -1)) tile_row_out = self.tile(range_vec_row_out, (self._length,)) tile_col_out = self.tile(range_vec_col_out, (1, self._length)) range_mat_out = self.range_mat(tile_row_out, (self._length, self._length)) transpose_out = self.range_mat(tile_col_out, (self._length, self._length)) distance_mat = self.sub(range_mat_out, transpose_out) distance_mat_clipped = P.clip_by_value(distance_mat, self._min_relative_position, self._max_relative_position) # Shift values to be >=0. Each integer still uniquely identifies a # relative position difference. final_mat = distance_mat_clipped + self._max_relative_position return final_mat
def cyclegan_lr(max_epoch, n_epoch, dataset_size): """ Generate learning rate for cycle_gan. Args: max_epoch (int): Epoch size for training. n_epoch (int): Number of epochs with the initial learning rate. dataset_size (int): Total size of dataset. Returns: Tensor, learning rate. """ n_epochs_decay = max_epoch - n_epoch lrs = [0.0002] * dataset_size * n_epoch lr_epoch = 0 for epoch in range(n_epochs_decay): lr_epoch = 0.0002 * (n_epochs_decay - epoch) / n_epochs_decay lrs += [lr_epoch] * dataset_size lrs += [lr_epoch] * dataset_size * (max_epoch - n_epochs_decay - n_epoch) return ts.array(lrs, dtype=ts.float32)
def postprocess(self, input, strategy='TOP1_CLASS'): r''' Apply postprocess operation for prediction result. Args: input (numpy.ndarray): Prediction result. strategy (str): Specifies the postprocess strategy. Default: TOP1_CLASS. Returns: str, the postprocess result. ''' if not isinstance(input, np.ndarray): raise TypeError("Input should be NumPy, got {}.".format( type(input))) if not input.ndim == 2: raise TypeError("Input should be 2-D Numpy, got {}.".format( input.ndim)) if strategy not in self.transform_strategy: raise ValueError("Strategy should be one of {}, got {}.".format( self.transform_strategy, strategy)) softmax = Softmax() score_list = softmax(ts.array(input)).asnumpy() if strategy == 'TOP1_CLASS': score = max(score_list[0]) return ('TOP1: ' + str(self.labels[input[0].argmax()]) + ', score: ' + str(format(score, '.20f'))) else: label_index = np.argsort(input[0])[::-1] score_index = np.sort(score_list[0])[::-1] top5_labels = [] res = '' top5_scores = score_index[:5].tolist() for i in range(5): top5_labels.append(self.labels[label_index[i]]) res += 'TOP' + str(i+1) + ": " + str(top5_labels[i]) + \ ", score: " + str(format(top5_scores[i], '.20f')) + '\n' return res
def mobilenetv2_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): """ Generate learning rate for mobilenetv2. Args: global_step (int): Total steps of the training. lr_init (float): Init learning rate. lr_end (float): End learning rate. lr_max (float): Max learning rate. warmup_epochs (int): Number of warmup epochs. total_epochs (int): Total epoch of training. steps_per_epoch (int): Steps of one epoch. Returns: Tensor, learning rate. """ lr_each_step = [] total_steps = steps_per_epoch * total_epochs warmup_steps = steps_per_epoch * warmup_epochs for i in range(total_steps): if i < warmup_steps: lr = lr_init + (lr_max - lr_init) * i / warmup_steps else: lr = lr_end + \ (lr_max - lr_end) * \ (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. if lr < 0.0: lr = 0.0 lr_each_step.append(lr) current_step = global_step lr_each_step = ts.array(lr_each_step, dtype=ts.float32) learning_rate = lr_each_step[current_step:] return learning_rate
def web_predict(instance, servable_name, servable_model, dataset_name, strategy): """ Predict the result based on the input data. A network will be constructed based on the input and servable data, then load the checkpoint and do the predict. Args: instance (dict): the dict of input image after transformation, with keys of `shape`, `dtype` and `data`(Image object). servable_name (str): servable name servable_model (str): name of the model strategy (str): output strategy, usually select between `TOP1_CLASS` and `TOP5_CLASS`, for cyclegan, select between `gray2color` and `color2gray` Returns: The dict object of predicted result after post process. Examples: >>> # In the server part, after servable_search >>> res = web_predict(instance, servable_name, servable['model'], strategy) >>> return jsonify(res) """ # check if servable model name is valid model_name = servable_model['name'] net_func = model_checker.get(model_name) if net_func is None: err_msg = "Currently model_name only supports " + str(list(model_checker.keys())) + "!" return {"status": 1, "err_msg": err_msg} # check if model_format is valid model_format = servable_model['format'] if model_format not in ("ckpt"): err_msg = "Currently model_format only supports `ckpt`!" return {"status": 1, "err_msg": err_msg} # Check if dataset supports trans_func = transform_checker.get(dataset_name) if trans_func is None: print("Currently dataset_name only supports {}!".format(list(transform_checker.keys()))) sys.exit(0) # process the original data ori_img = np.array(json.loads(instance['data']), dtype=instance['dtype']) if dataset_name in ['mnist']: image = trans_func(ori_img) else: cvt_image = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB) image = trans_func(cvt_image) input_data = ts.array(image.tolist(), dtype=image.dtype.name) res_msg = '' if model_name == "cycle_gan": g_model = servable_model['g_model'] if strategy == 'gray2color': # build the network G_generator, _ = net_func(g_model=g_model) ckpt_name = 'G_A' elif strategy == 'color2gray': _, G_generator = net_func(g_model=g_model) ckpt_name = 'G_B' else: err_msg = "Currently cycle_gan strategy only supports `gray2color` and `color2gray`!" return {"status": 1, "err_msg": err_msg} ckpt_path = os.path.join(serving_path, servable_name, ckpt_name + "." + model_format) out_img = cyclegan_predict(G_generator, input_data, ckpt_path) res_msg = '原图使用{}风格迁移效果'.format(strategy) data = numpy2base64(out_img) else: # build the network class_num = servable_model['class_num'] net = net_func(class_num=class_num, is_training=False) serve_model = model.Model(net) # load checkpoint ckpt_path = os.path.join(serving_path, servable_name, model_name + "." + model_format) if not os.path.isfile(ckpt_path): err_msg = "The model path " + ckpt_path + " not exist!" return {"status": 1, "err_msg": err_msg} serve_model.load_checkpoint(ckpt_path) # execute the network to perform model prediction output = serve_model.predict(ts.expand_dims(input_data, 0)) if model_name == "ssd300": output_np = (ts.concatenate((output[0], output[1]), axis=-1).asnumpy()) ih, iw, _ = instance['shape'] bbox_data = trans_func.postprocess(output_np, (ih, iw), strategy) print(bbox_data) bbox_num = len(bbox_data) if not bbox_num: err_msg = "抱歉!未检测到任何种类,无法标注。" return {"status": 1, "err_msg": err_msg} out_img = draw_boxes_in_image(bbox_data, ori_img) max_det = max(bbox_data, key=lambda k: k['score']) max_score = max_det['score'] category = bbox_data[bbox_data.index(max_det)]['category_id'] res_msg = '图中共标注了:{}个框,其中物种{}的得分最高, 为{}。'.format(bbox_num, category, round(max_score, 3)) data = numpy2base64(cv2.cvtColor(out_img, cv2.COLOR_BGR2RGB)) else: output_np = output.asnumpy() res_msg = trans_func.postprocess(output_np, strategy) data = numpy2base64(ori_img) res = { "status": 0, "instance": { "res_msg": res_msg, "data": data } } return res
def predict(instance, servable_name, servable_model, strategy): """ Predict the result based on the input data. A network will be constructed based on the input and servable data, then load the checkpoint and do the predict. Args: instance (dict): the dict of input image after transformation, with keys of `shape`, `dtype` and `data`(Image object). servable_name (str): servable name servable_model (str): name of the model strategy (str): output strategy, usually select between `TOP1_CLASS` and `TOP5_CLASS`, for cyclegan, select between `gray2color` and `color2gray` Returns: The dict object of predicted result after post process. Examples: >>> # In the server part, after servable_search >>> res = predict(instance, servable_name, servable['model'], strategy) >>> return jsonify(res) """ # check if servable model name is valid model_name = servable_model['name'] net_func = model_checker.get(model_name) if net_func is None: err_msg = "Currently model_name only supports " + str(list(model_checker.keys())) + "!" return {"status": 1, "err_msg": err_msg} # check if model_format is valid model_format = servable_model['format'] if model_format not in ("ckpt"): err_msg = "Currently model_format only supports `ckpt`!" return {"status": 1, "err_msg": err_msg} # parse the input data input_data = ts.array(json.loads(instance['data']), dtype=instance['dtype']) if model_name == "cycle_gan": g_model = servable_model['g_model'] if strategy == 'gray2color': # build the network G_generator, _ = net_func(g_model=g_model) ckpt_name = 'G_A' elif strategy == 'color2gray': _, G_generator = net_func(g_model=g_model) ckpt_name = 'G_B' else: err_msg = "Currently cycle_gan strategy only supports `gray2color` and `color2gray`!" return {"status": 1, "err_msg": err_msg} ckpt_path = os.path.join(serving_path, servable_name, ckpt_name + "." + model_format) data = cyclegan_predict(G_generator, input_data, ckpt_path) else: # build the network class_num = servable_model['class_num'] net = net_func(class_num=class_num, is_training=False) serve_model = model.Model(net) # load checkpoint ckpt_path = os.path.join(serving_path, servable_name, model_name + "." + model_format) if not os.path.isfile(ckpt_path): err_msg = "The model path " + ckpt_path + " not exist!" return {"status": 1, "err_msg": err_msg} serve_model.load_checkpoint(ckpt_path) # execute the network to perform model prediction output = serve_model.predict(ts.expand_dims(input_data, 0)) data = (ts.concatenate((output[0], output[1]), axis=-1).asnumpy() if model_name == "ssd300" else output.asnumpy()) return { "status": 0, "instance": { "shape": data.shape, "dtype": data.dtype.name, "data": json.dumps(data.tolist()) } }
def predict(instance, servable_name, servable_model, strategy): # check if servable model name is valid model_name = servable_model['name'] net_func = model_checker.get(model_name) if net_func is None: err_msg = "Currently model_name only supports " + str( list(model_checker.keys())) + "!" return {"status": 1, "err_msg": err_msg} # check if model_format is valid model_format = servable_model['format'] if model_format not in ("ckpt"): err_msg = "Currently model_format only supports `ckpt`!" return {"status": 1, "err_msg": err_msg} # parse the input data input_data = ts.array(json.loads(instance['data']), dtype=instance['dtype']) if model_name == "cycle_gan": g_model = servable_model['g_model'] if strategy == 'gray2color': # build the network G_generator, _ = net_func(g_model=g_model) ckpt_name = 'G_A' elif strategy == 'color2gray': _, G_generator = net_func(g_model=g_model) ckpt_name = 'G_B' else: err_msg = "Currently cycle_gan strategy only supports `gray2color` and `color2gray`!" return {"status": 1, "err_msg": err_msg} ckpt_path = os.path.join("/etc/tinyms/serving", servable_name, ckpt_name + "." + model_format) data = cyclegan_predict(G_generator, input_data, ckpt_path) else: # build the network class_num = servable_model['class_num'] net = net_func(class_num=class_num) serve_model = model.Model(net) # load checkpoint ckpt_path = os.path.join("/etc/tinyms/serving", servable_name, model_name + "." + model_format) if not os.path.isfile(ckpt_path): err_msg = "The model path " + ckpt_path + " not exist!" return {"status": 1, "err_msg": err_msg} serve_model.load_checkpoint(ckpt_path) # execute the network to perform model prediction output = serve_model.predict(ts.expand_dims(input_data, 0)) data = (ts.concatenate((output[0], output[1]), axis=-1).asnumpy() if model_name == "ssd300" else output.asnumpy()) return { "status": 0, "instance": { "shape": data.shape, "dtype": data.dtype.name, "data": json.dumps(data.tolist()) } }
def construct(self, from_tensor, to_tensor, attention_mask): """reshape 2d/3d input tensors to 2d""" from_tensor_2d = self.reshape(from_tensor, self.shape_from_2d) to_tensor_2d = self.reshape(to_tensor, self.shape_to_2d) query_out = self.query_layer(from_tensor_2d) key_out = self.key_layer(to_tensor_2d) value_out = self.value_layer(to_tensor_2d) query_layer = self.reshape(query_out, self.shape_from) query_layer = self.transpose(query_layer, self.trans_shape) key_layer = self.reshape(key_out, self.shape_to) key_layer = self.transpose(key_layer, self.trans_shape) attention_scores = self.matmul_trans_b(query_layer, key_layer) # use_relative_position, supplementary logic if self.use_relative_positions: # relations_keys is [F|T, F|T, H] relations_keys = self._generate_relative_positions_embeddings() relations_keys = self.cast_compute_type(relations_keys) # query_layer_t is [F, B, N, H] query_layer_t = self.transpose(query_layer, self.trans_shape_relative) # query_layer_r is [F, B * N, H] query_layer_r = self.reshape(query_layer_t, (self.from_seq_length, -1, self.size_per_head)) # key_position_scores is [F, B * N, F|T] key_position_scores = self.matmul_trans_b(query_layer_r, relations_keys) # key_position_scores_r is [F, B, N, F|T] key_position_scores_r = self.reshape(key_position_scores, (self.from_seq_length, -1, self.num_attention_heads, self.from_seq_length)) # key_position_scores_r_t is [B, N, F, F|T] key_position_scores_r_t = self.transpose(key_position_scores_r, self.trans_shape_position) attention_scores = attention_scores + key_position_scores_r_t attention_scores = self.multiply(self.scores_mul, attention_scores) if self.has_attention_mask: attention_mask = self.expand_dims(attention_mask, 1) multiply_out = self.sub(self.cast(ts.array((1.0,)), self.get_dtype(attention_scores)), self.cast(attention_mask, self.get_dtype(attention_scores))) adder = self.multiply(multiply_out, self.multiply_data) attention_scores = self.add(adder, attention_scores) attention_probs = self.softmax(attention_scores) attention_probs = self.dropout(attention_probs) value_layer = self.reshape(value_out, self.shape_to) value_layer = self.transpose(value_layer, self.trans_shape) context_layer = self.matmul(attention_probs, value_layer) # use_relative_position, supplementary logic if self.use_relative_positions: # relations_values is [F|T, F|T, H] relations_values = self._generate_relative_positions_embeddings() relations_values = self.cast_compute_type(relations_values) # attention_probs_t is [F, B, N, T] attention_probs_t = self.transpose(attention_probs, self.trans_shape_relative) # attention_probs_r is [F, B * N, T] attention_probs_r = self.reshape( attention_probs_t, (self.from_seq_length, -1, self.to_seq_length)) # value_position_scores is [F, B * N, H] value_position_scores = self.matmul(attention_probs_r, relations_values) # value_position_scores_r is [F, B, N, H] value_position_scores_r = self.reshape(value_position_scores, (self.from_seq_length, -1, self.num_attention_heads, self.size_per_head)) # value_position_scores_r_t is [B, N, F, H] value_position_scores_r_t = self.transpose(value_position_scores_r, self.trans_shape_position) context_layer = context_layer + value_position_scores_r_t context_layer = self.transpose(context_layer, self.trans_shape) context_layer = self.reshape(context_layer, self.shape_return) return context_layer