示例#1
0
 def construct(self, gradients, overflow):
     """AdamWeightDecayForBert"""
     lr = self.get_lr()
     cond = self.op_cast(P.Fill()(ts.int32, self.op_shape(self.beta1), 1) *\
                         self.op_reshape(overflow, (())), ts.bool_)
     beta1 = self.op_select(cond, self.op_cast(ts.array((1.0, )),
                                               ts.float32), self.beta1)
     beta2 = self.op_select(cond, self.op_cast(ts.array((1.0, )),
                                               ts.float32), self.beta2)
     if self.is_group:
         if self.is_group_lr:
             optim_result = self.hyper_map(
                 P.Partial()(_adam_opt, self.beta1, self.beta2,
                             self.eps), lr, self.weight_decay,
                 self.parameters, self.moments1, self.moments2, gradients,
                 self.decay_flags, self.optim_filter)
         else:
             optim_result = self.hyper_map(
                 P.Partial()(_adam_opt, beta1, beta2, self.eps, lr,
                             overflow), self.weight_decay, self.parameters,
                 self.moments1, self.moments2, gradients, self.decay_flags,
                 self.optim_filter)
     else:
         optim_result = self.hyper_map(
             P.Partial()(_adam_opt, self.beta1, self.beta2, self.eps, lr,
                         self.weight_decay), self.parameters, self.moments1,
             self.moments2, gradients, self.decay_flags, self.optim_filter)
     if self.use_parallel:
         self.broadcast_params(optim_result)
     return optim_result
示例#2
0
    def __init__(self, learning_rate, end_learning_rate, warmup_steps,
                 decay_steps, power):
        super(BertLearningRate, self).__init__()
        self.warmup_flag = False
        if warmup_steps > 0:
            self.warmup_flag = True
            self.warmup_lr = WarmUpLR(learning_rate, warmup_steps)
        self.decay_lr = PolynomialDecayLR(learning_rate, end_learning_rate,
                                          decay_steps, power)
        self.warmup_steps = ts.array([warmup_steps], dtype=ts.float32)

        self.greater = P.Greater()
        self.one = ts.array([1.0], dtype=ts.float32)
        self.cast = P.Cast()
示例#3
0
 def __init__(self,
              params,
              learning_rate=1e-3,
              beta1=0.9,
              beta2=0.999,
              eps=1e-6,
              weight_decay=0.0):
     super(AdamWeightDecayOp, self).__init__(learning_rate, params,
                                             weight_decay)
     _check_param_value(beta1, beta2, eps, self.cls_name)
     self.beta1 = ts.array([beta1], dtype=ts.float32)
     self.beta2 = ts.array([beta2], dtype=ts.float32)
     self.eps = ts.array([eps], dtype=ts.float32)
     self.moments1 = self.parameters.clone(prefix="adam_m", init='zeros')
     self.moments2 = self.parameters.clone(prefix="adam_v", init='zeros')
     self.hyper_map = P.HyperMap()
示例#4
0
    def convert2tensor(self, transform_input):
        r"""
        Convert the numpy data to the tensor format.

        Args:
            transform_input (numpy.ndarray): the preprocessing image.

        Returns:
            Tensor, the converted image.
        """
        if not isinstance(transform_input, np.ndarray):
            err_msg = 'The transform_input type should be numpy.ndarray, got {}.'.format(
                type(transform_input))
            raise TypeError(err_msg)
        input_tensor = ts.expand_dims(ts.array(list(transform_input)), 0)
        return input_tensor
示例#5
0
    def construct(self):
        """Generates matrix of relative positions between inputs."""
        range_vec_row_out = self.cast(ts.array(ts.arange(self._length)), int32)
        range_vec_col_out = self.range_mat(range_vec_row_out, (self._length, -1))
        tile_row_out = self.tile(range_vec_row_out, (self._length,))
        tile_col_out = self.tile(range_vec_col_out, (1, self._length))
        range_mat_out = self.range_mat(tile_row_out, (self._length, self._length))
        transpose_out = self.range_mat(tile_col_out, (self._length, self._length))
        distance_mat = self.sub(range_mat_out, transpose_out)

        distance_mat_clipped = P.clip_by_value(distance_mat,
                                               self._min_relative_position,
                                               self._max_relative_position)

        # Shift values to be >=0. Each integer still uniquely identifies a
        # relative position difference.
        final_mat = distance_mat_clipped + self._max_relative_position
        return final_mat
示例#6
0
def cyclegan_lr(max_epoch, n_epoch, dataset_size):
    """
    Generate learning rate for cycle_gan.

    Args:
       max_epoch (int): Epoch size for training.
       n_epoch (int): Number of epochs with the initial learning rate.
       dataset_size (int): Total size of dataset.

    Returns:
       Tensor, learning rate.
    """
    n_epochs_decay = max_epoch - n_epoch
    lrs = [0.0002] * dataset_size * n_epoch
    lr_epoch = 0
    for epoch in range(n_epochs_decay):
        lr_epoch = 0.0002 * (n_epochs_decay - epoch) / n_epochs_decay
        lrs += [lr_epoch] * dataset_size
    lrs += [lr_epoch] * dataset_size * (max_epoch - n_epochs_decay - n_epoch)
    return ts.array(lrs, dtype=ts.float32)
示例#7
0
    def postprocess(self, input, strategy='TOP1_CLASS'):
        r'''
        Apply postprocess operation for prediction result.

        Args:
            input (numpy.ndarray): Prediction result.
            strategy (str): Specifies the postprocess strategy. Default: TOP1_CLASS.

        Returns:
            str, the postprocess result.
        '''
        if not isinstance(input, np.ndarray):
            raise TypeError("Input should be NumPy, got {}.".format(
                type(input)))
        if not input.ndim == 2:
            raise TypeError("Input should be 2-D Numpy, got {}.".format(
                input.ndim))
        if strategy not in self.transform_strategy:
            raise ValueError("Strategy should be one of {}, got {}.".format(
                self.transform_strategy, strategy))

        softmax = Softmax()
        score_list = softmax(ts.array(input)).asnumpy()
        if strategy == 'TOP1_CLASS':
            score = max(score_list[0])
            return ('TOP1: ' + str(self.labels[input[0].argmax()]) +
                    ', score: ' + str(format(score, '.20f')))
        else:
            label_index = np.argsort(input[0])[::-1]
            score_index = np.sort(score_list[0])[::-1]
            top5_labels = []
            res = ''
            top5_scores = score_index[:5].tolist()
            for i in range(5):
                top5_labels.append(self.labels[label_index[i]])
                res += 'TOP' + str(i+1) + ": " + str(top5_labels[i]) + \
                    ", score: " + str(format(top5_scores[i], '.20f')) + '\n'
            return res
示例#8
0
def mobilenetv2_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs,
                   total_epochs, steps_per_epoch):
    """
    Generate learning rate for mobilenetv2.

    Args:
       global_step (int): Total steps of the training.
       lr_init (float): Init learning rate.
       lr_end (float): End learning rate.
       lr_max (float): Max learning rate.
       warmup_epochs (int): Number of warmup epochs.
       total_epochs (int): Total epoch of training.
       steps_per_epoch (int): Steps of one epoch.

    Returns:
       Tensor, learning rate.
    """
    lr_each_step = []
    total_steps = steps_per_epoch * total_epochs
    warmup_steps = steps_per_epoch * warmup_epochs
    for i in range(total_steps):
        if i < warmup_steps:
            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
        else:
            lr = lr_end + \
                (lr_max - lr_end) * \
                (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
        if lr < 0.0:
            lr = 0.0
        lr_each_step.append(lr)

    current_step = global_step
    lr_each_step = ts.array(lr_each_step, dtype=ts.float32)
    learning_rate = lr_each_step[current_step:]

    return learning_rate
示例#9
0
def web_predict(instance, servable_name, servable_model, dataset_name, strategy):
    """
    Predict the result based on the input data.

    A network will be constructed based on the input and servable data, then load the checkpoint and do the predict.

    Args:
        instance (dict): the dict of input image after transformation, with keys of `shape`, `dtype` and `data`(Image object).
        servable_name (str): servable name
        servable_model (str): name of the model
        strategy (str): output strategy, usually select between `TOP1_CLASS` and `TOP5_CLASS`, for cyclegan, select between `gray2color` and `color2gray`

    Returns:
        The dict object of predicted result after post process.

    Examples:
        >>> # In the server part, after servable_search
        >>> res = web_predict(instance, servable_name, servable['model'], strategy)
        >>> return jsonify(res)
    """

    # check if servable model name is valid
    model_name = servable_model['name']
    net_func = model_checker.get(model_name)
    if net_func is None:
        err_msg = "Currently model_name only supports " + str(list(model_checker.keys())) + "!"
        return {"status": 1, "err_msg": err_msg}

    # check if model_format is valid
    model_format = servable_model['format']
    if model_format not in ("ckpt"):
        err_msg = "Currently model_format only supports `ckpt`!"
        return {"status": 1, "err_msg": err_msg}

    # Check if dataset supports
    trans_func = transform_checker.get(dataset_name)
    if trans_func is None:
        print("Currently dataset_name only supports {}!".format(list(transform_checker.keys())))
        sys.exit(0)

    # process the original data
    ori_img = np.array(json.loads(instance['data']), dtype=instance['dtype'])
    if dataset_name in ['mnist']:
        image = trans_func(ori_img)
    else:
        cvt_image = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
        image = trans_func(cvt_image)

    input_data = ts.array(image.tolist(), dtype=image.dtype.name)

    res_msg = ''
    if model_name == "cycle_gan":
        g_model = servable_model['g_model']
        if strategy == 'gray2color':
            # build the network
            G_generator, _ = net_func(g_model=g_model)
            ckpt_name = 'G_A'

        elif strategy == 'color2gray':
            _, G_generator = net_func(g_model=g_model)
            ckpt_name = 'G_B'
        else:
            err_msg = "Currently cycle_gan strategy only supports `gray2color` and `color2gray`!"
            return {"status": 1, "err_msg": err_msg}
        ckpt_path = os.path.join(serving_path, servable_name, ckpt_name + "." + model_format)
        out_img = cyclegan_predict(G_generator, input_data, ckpt_path)
        res_msg = '原图使用{}风格迁移效果'.format(strategy)
        data = numpy2base64(out_img)
    else:
        # build the network
        class_num = servable_model['class_num']
        net = net_func(class_num=class_num, is_training=False)
        serve_model = model.Model(net)

        # load checkpoint
        ckpt_path = os.path.join(serving_path, servable_name, model_name + "." + model_format)
        if not os.path.isfile(ckpt_path):
            err_msg = "The model path " + ckpt_path + " not exist!"
            return {"status": 1, "err_msg": err_msg}
        serve_model.load_checkpoint(ckpt_path)

        # execute the network to perform model prediction
        output = serve_model.predict(ts.expand_dims(input_data, 0))

        if model_name == "ssd300":
            output_np = (ts.concatenate((output[0], output[1]), axis=-1).asnumpy())
            ih, iw, _ = instance['shape']
            bbox_data = trans_func.postprocess(output_np, (ih, iw), strategy)
            print(bbox_data)
            bbox_num = len(bbox_data)
            if not bbox_num:
                err_msg = "抱歉!未检测到任何种类,无法标注。"
                return {"status": 1, "err_msg": err_msg}
            out_img = draw_boxes_in_image(bbox_data, ori_img)
            max_det = max(bbox_data, key=lambda k: k['score'])
            max_score = max_det['score']
            category = bbox_data[bbox_data.index(max_det)]['category_id']
            res_msg = '图中共标注了:{}个框,其中物种{}的得分最高, 为{}。'.format(bbox_num, category, round(max_score, 3))
            data = numpy2base64(cv2.cvtColor(out_img, cv2.COLOR_BGR2RGB))
        else:
            output_np = output.asnumpy()
            res_msg = trans_func.postprocess(output_np, strategy)
            data = numpy2base64(ori_img)

    res = {
        "status": 0,
        "instance": {
            "res_msg": res_msg,
            "data": data
        }
    }
    return res
示例#10
0
def predict(instance, servable_name, servable_model, strategy):
    """
    Predict the result based on the input data.

    A network will be constructed based on the input and servable data, then load the checkpoint and do the predict.

    Args:
        instance (dict): the dict of input image after transformation, with keys of `shape`, `dtype` and `data`(Image object).
        servable_name (str): servable name
        servable_model (str): name of the model
        strategy (str): output strategy, usually select between `TOP1_CLASS` and `TOP5_CLASS`, for cyclegan, select between `gray2color` and `color2gray`

    Returns:
        The dict object of predicted result after post process.

    Examples:
        >>> # In the server part, after servable_search
        >>> res = predict(instance, servable_name, servable['model'], strategy)
        >>> return jsonify(res)
    """

    # check if servable model name is valid
    model_name = servable_model['name']
    net_func = model_checker.get(model_name)
    if net_func is None:
        err_msg = "Currently model_name only supports " + str(list(model_checker.keys())) + "!"
        return {"status": 1, "err_msg": err_msg}

    # check if model_format is valid
    model_format = servable_model['format']
    if model_format not in ("ckpt"):
        err_msg = "Currently model_format only supports `ckpt`!"
        return {"status": 1, "err_msg": err_msg}

    # parse the input data
    input_data = ts.array(json.loads(instance['data']), dtype=instance['dtype'])

    if model_name == "cycle_gan":
        g_model = servable_model['g_model']
        if strategy == 'gray2color':
            # build the network
            G_generator, _ = net_func(g_model=g_model)
            ckpt_name = 'G_A'

        elif strategy == 'color2gray':
            _, G_generator = net_func(g_model=g_model)
            ckpt_name = 'G_B'
        else:
            err_msg = "Currently cycle_gan strategy only supports `gray2color` and `color2gray`!"
            return {"status": 1, "err_msg": err_msg}
        ckpt_path = os.path.join(serving_path, servable_name, ckpt_name + "." + model_format)
        data = cyclegan_predict(G_generator, input_data, ckpt_path)
    else:
        # build the network
        class_num = servable_model['class_num']
        net = net_func(class_num=class_num, is_training=False)
        serve_model = model.Model(net)

        # load checkpoint
        ckpt_path = os.path.join(serving_path, servable_name, model_name + "." + model_format)
        if not os.path.isfile(ckpt_path):
            err_msg = "The model path " + ckpt_path + " not exist!"
            return {"status": 1, "err_msg": err_msg}
        serve_model.load_checkpoint(ckpt_path)

        # execute the network to perform model prediction
        output = serve_model.predict(ts.expand_dims(input_data, 0))

        data = (ts.concatenate((output[0], output[1]), axis=-1).asnumpy() if model_name == "ssd300"
                else output.asnumpy())
    return {
        "status": 0,
        "instance": {
            "shape": data.shape,
            "dtype": data.dtype.name,
            "data": json.dumps(data.tolist())
        }
    }
示例#11
0
def predict(instance, servable_name, servable_model, strategy):
    # check if servable model name is valid
    model_name = servable_model['name']
    net_func = model_checker.get(model_name)
    if net_func is None:
        err_msg = "Currently model_name only supports " + str(
            list(model_checker.keys())) + "!"
        return {"status": 1, "err_msg": err_msg}

    # check if model_format is valid
    model_format = servable_model['format']
    if model_format not in ("ckpt"):
        err_msg = "Currently model_format only supports `ckpt`!"
        return {"status": 1, "err_msg": err_msg}

    # parse the input data
    input_data = ts.array(json.loads(instance['data']),
                          dtype=instance['dtype'])

    if model_name == "cycle_gan":
        g_model = servable_model['g_model']
        if strategy == 'gray2color':
            # build the network
            G_generator, _ = net_func(g_model=g_model)
            ckpt_name = 'G_A'

        elif strategy == 'color2gray':
            _, G_generator = net_func(g_model=g_model)
            ckpt_name = 'G_B'
        else:
            err_msg = "Currently cycle_gan strategy only supports `gray2color` and `color2gray`!"
            return {"status": 1, "err_msg": err_msg}
        ckpt_path = os.path.join("/etc/tinyms/serving", servable_name,
                                 ckpt_name + "." + model_format)

        data = cyclegan_predict(G_generator, input_data, ckpt_path)
    else:
        # build the network
        class_num = servable_model['class_num']
        net = net_func(class_num=class_num)
        serve_model = model.Model(net)

        # load checkpoint
        ckpt_path = os.path.join("/etc/tinyms/serving", servable_name,
                                 model_name + "." + model_format)
        if not os.path.isfile(ckpt_path):
            err_msg = "The model path " + ckpt_path + " not exist!"
            return {"status": 1, "err_msg": err_msg}
        serve_model.load_checkpoint(ckpt_path)

        # execute the network to perform model prediction
        output = serve_model.predict(ts.expand_dims(input_data, 0))

        data = (ts.concatenate((output[0], output[1]), axis=-1).asnumpy()
                if model_name == "ssd300" else output.asnumpy())
    return {
        "status": 0,
        "instance": {
            "shape": data.shape,
            "dtype": data.dtype.name,
            "data": json.dumps(data.tolist())
        }
    }
示例#12
0
    def construct(self, from_tensor, to_tensor, attention_mask):
        """reshape 2d/3d input tensors to 2d"""
        from_tensor_2d = self.reshape(from_tensor, self.shape_from_2d)
        to_tensor_2d = self.reshape(to_tensor, self.shape_to_2d)
        query_out = self.query_layer(from_tensor_2d)
        key_out = self.key_layer(to_tensor_2d)
        value_out = self.value_layer(to_tensor_2d)

        query_layer = self.reshape(query_out, self.shape_from)
        query_layer = self.transpose(query_layer, self.trans_shape)
        key_layer = self.reshape(key_out, self.shape_to)
        key_layer = self.transpose(key_layer, self.trans_shape)

        attention_scores = self.matmul_trans_b(query_layer, key_layer)

        # use_relative_position, supplementary logic
        if self.use_relative_positions:
            # relations_keys is [F|T, F|T, H]
            relations_keys = self._generate_relative_positions_embeddings()
            relations_keys = self.cast_compute_type(relations_keys)
            # query_layer_t is [F, B, N, H]
            query_layer_t = self.transpose(query_layer, self.trans_shape_relative)
            # query_layer_r is [F, B * N, H]
            query_layer_r = self.reshape(query_layer_t,
                                         (self.from_seq_length,
                                          -1,
                                          self.size_per_head))
            # key_position_scores is [F, B * N, F|T]
            key_position_scores = self.matmul_trans_b(query_layer_r,
                                                      relations_keys)
            # key_position_scores_r is [F, B, N, F|T]
            key_position_scores_r = self.reshape(key_position_scores,
                                                 (self.from_seq_length,
                                                  -1,
                                                  self.num_attention_heads,
                                                  self.from_seq_length))
            # key_position_scores_r_t is [B, N, F, F|T]
            key_position_scores_r_t = self.transpose(key_position_scores_r,
                                                     self.trans_shape_position)
            attention_scores = attention_scores + key_position_scores_r_t

        attention_scores = self.multiply(self.scores_mul, attention_scores)

        if self.has_attention_mask:
            attention_mask = self.expand_dims(attention_mask, 1)
            multiply_out = self.sub(self.cast(ts.array((1.0,)), self.get_dtype(attention_scores)),
                                    self.cast(attention_mask, self.get_dtype(attention_scores)))

            adder = self.multiply(multiply_out, self.multiply_data)
            attention_scores = self.add(adder, attention_scores)

        attention_probs = self.softmax(attention_scores)
        attention_probs = self.dropout(attention_probs)

        value_layer = self.reshape(value_out, self.shape_to)
        value_layer = self.transpose(value_layer, self.trans_shape)
        context_layer = self.matmul(attention_probs, value_layer)

        # use_relative_position, supplementary logic
        if self.use_relative_positions:
            # relations_values is [F|T, F|T, H]
            relations_values = self._generate_relative_positions_embeddings()
            relations_values = self.cast_compute_type(relations_values)
            # attention_probs_t is [F, B, N, T]
            attention_probs_t = self.transpose(attention_probs, self.trans_shape_relative)
            # attention_probs_r is [F, B * N, T]
            attention_probs_r = self.reshape(
                attention_probs_t,
                (self.from_seq_length,
                 -1,
                 self.to_seq_length))
            # value_position_scores is [F, B * N, H]
            value_position_scores = self.matmul(attention_probs_r,
                                                relations_values)
            # value_position_scores_r is [F, B, N, H]
            value_position_scores_r = self.reshape(value_position_scores,
                                                   (self.from_seq_length,
                                                    -1,
                                                    self.num_attention_heads,
                                                    self.size_per_head))
            # value_position_scores_r_t is [B, N, F, H]
            value_position_scores_r_t = self.transpose(value_position_scores_r,
                                                       self.trans_shape_position)
            context_layer = context_layer + value_position_scores_r_t

        context_layer = self.transpose(context_layer, self.trans_shape)
        context_layer = self.reshape(context_layer, self.shape_return)

        return context_layer