Пример #1
0
    def load(self, model_path):
        """加载模型文件
        [in]  model_path: str, 模型文件加载地址
        """
        logging.info("load model from: {}".format(model_path))
        start_time = time.time()

        self.label_list = None
        self.feature_name_list = list()
        self.feature_weight_dict = dict()
        self.softmax_feature_weight_dict = dict()

        with codecs.open(model_path, "r", "gb18030") as rf:
            for index, line in enumerate(rf):
                line = line.strip("\n")
                if index == 0:
                    self.label_list = [int(x) for x in line.split(" ")[-1].split(",")]
                    class_num = len(self.label_list)
                    assert class_num > 1, "class num should greater than 1, actual {}".format(class_num)
                    continue

                feature_id, feature_name, weights_str = line.strip("\n").split("\t")
                self.feature_name_list.append(feature_name)

                weights = [float(x) for x in weights_str.split(" ")]
                assert len(weights) == class_num, "wrong weight num at line #%d, expect %d, actual %d." \
                                                  % (index+1, class_num, len(weights))

                self.feature_weight_dict[feature_name] = weights
                self.softmax_feature_weight_dict[feature_name] = softmax(weights, axis=1)

        logging.info("cost time %.4fs." % (time.time() - start_time))
Пример #2
0
    def gen_feature_weight_dict(self, liblinear_model_path, feature_name_list):
        """根据liblinear生成的模型文件和特征保留文件生成线上需要的multiclass特征权重文件
        [in]  liblinear_model_path: str, liblinear模型文件地址
              feature_name_list: list(str), 特征字面列表, 其顺序应与liblinear模型文件中特征的顺序一致
        """

        self.label_list = list()
        self.feature_name_list = feature_name_list
        self.feature_weight_dict = dict()
        self.softmax_feature_weight_dict = dict()

        # 读入模型文件中的特征权值
        # 按类别值从小到大的顺序给出
        class_num = None
        feature_index = 0
        logging.info("gen feature weight file from liblinear model: %s." % liblinear_model_path)
        start_time = time.time()
        with codecs.open(liblinear_model_path, "r", "gb18030") as rf:
            for index, line in enumerate(rf):
                line = line.strip("\n")
                if index == 1:
                    class_num = int(line.split(" ")[-1])
                elif index == 2:
                    labels = [int(x) for x in line.split(" ")[1:]]
                    assert class_num == len(labels), "class num(%d) != labels size(%d)." % (class_num, len(labels))
                    # 数组a=[3,5,4,2,1] 则arg_sort(a)=[4,3,0,2,1]:表示第4位是a数组中值最小的,第3位是a数组中第二小的
                    # index_transfer是类别数组,各列按类别的值(类别由LabelEncoder编码后,为整数,可比较)排序的结果
                    # index_transfer[i]=j表示,第i小的类别是第j列
                    index_transfer = arg_sort(labels)
                    logging.info("position rank: " + ",".join([str(x) for x in index_transfer]))
                    for class_index in range(class_num):
                        self.label_list.append(str(labels[index_transfer[class_index]]))

                if index < 6:
                    continue

                # liblinear权值文件 最后会有一个空格
                weights = line.strip(" ").split(" ")
                assert len(weights) == class_num or class_num == 2, \
                    "wrong weight num at line #%d, expect %d, actual %d." % (index+1, class_num, len(weights))
                reordered_weights = list()
                if class_num > 2:
                    for weight_index in range(class_num):
                        reordered_weights.append(float(weights[index_transfer[weight_index]]))
                else:
                    # 二分类时 liblinear权重只有一维 该权重是第一个label的权重 该权重取反作为第二个label的权重
                    # 此时weights只有一列
                    reordered_weights = [0, 0]
                    # 权值是针对第一个label的
                    reordered_weights[index_transfer[0]] = float(weights[0])
                    # 第二个label为该权值取反
                    reordered_weights[index_transfer[1]] = -float(weights[0])

                self.feature_weight_dict[self.feature_name_list[feature_index]] = reordered_weights
                self.softmax_feature_weight_dict[self.feature_name_list[feature_index]] = \
                    softmax(reordered_weights, axis=1)
                feature_index += 1
        logging.info("cost time %.4fs." % (time.time() - start_time))
        self.model_loaded = True
Пример #3
0
    def post_process(self, im, sim_ops, scale_factor):
        """
		MUST HAVE FUNCTION IN ALL NETWORKS !!!! 
		Post-processing of the results from network. This function can be used to visualize data from hardware.  
		"""
        prob = softmax(sim_ops[0][0])
        preds = (np.argsort(prob)[::-1])[0:5]
        for p in preds:
            print_msg(str(classes[p]) + ' , ' + str(prob[p]), 3)
Пример #4
0
 def next(self):
     values = []
     while True:
         line = self.stream.next().strip()
         if not line:
             break
         values.append(self.__parse_line(line))
     # log.debug("Predictions: {}".format(values))
     labels = self.labels if self.labels else range(len(values))
     probs = softmax([-v for v in values])
     # log.debug("Probabilities: {}".format(probs))
     preds = zip(labels, probs)
     if self.sort:
         preds.sort(key=lambda x: x[1], reverse=True)
     return preds
Пример #5
0
    def format_alternatives(self, cword, preds, nexttok):
        pred = self.predicted_word(cword, preds)
        alts = []

        if pred.lower() != cword.err.lower():
            # Use the predicted probability for both source/original word and
            # prediction, next makes then true probabilities with softmax
            # function.
            prob_err = dict(preds)[cword.src]
            prob_pred = preds[0][1]
            prob_err, prob_pred = softmax([prob_err, prob_pred])
            # Alternatively, use the sum of probabilities for confusion words
            # differ from source/original word as the probability for
            # prediction.
            # prob_pred = 1.0 - prob_err

            alts.append(self.__plf_alternative(cword.err, nexttok, prob_err))
            alts.append(self.__plf_alternative(pred, nexttok, prob_pred))

        return ','.join(alts)
Пример #6
0
def _main(args):
    img_path = os.path.expanduser(args.img_path)
    file_name = os.path.basename(img_path)
    net = args.net
    output_path = args.output
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    img = cv2.imread(img_path)

    X, ratios = preprocess_test_img(img, config)

    # 输入tensor
    input_tensor = Input(shape=(img_height, img_widht, 3))
    input_rois = Input(shape=(None, 5))

    # shares conv
    model_body = get_model_body(input_tensor, net=net)

    # rpn
    rpn_model = get_rpn_model(model_body, anchors_num)
    # fastrcnn
    fastrcnn_model = get_fastrcnn_model(model_body=model_body, input_rois=input_rois,
                                        classes_num=classes_num, keep_prob=keep_prob)
    # load weights
    if not os.path.exists(args.rpn_model_weights_path) or \
            not os.path.exists(args.fastrccn_model_weights_path):
        raise ValueEmptyException('rpn_model_weights_path or fastrccn_model_weights_path is null, please check it')
    rpn_model.load_weights(args.rpn_model_weights_path, by_name=True)
    fastrcnn_model.load_weights(args.fastrccn_model_weights_path, by_name=True)

    rpn_bbox_cls, rpn_bbox_pred = rpn_model.predict(X)
    # shape (None, 5)   (batch_id, x1, y1, x2, y2)
    rois = proposal_layer(rpn_bbox_cls, rpn_bbox_pred, (img_widht, img_height), feat_stride, eval_mode=True)
    rois = np.expand_dims(rois, axis=0)
    # (1, 300, 21)  (1, 300, 80)
    fastrcnn_cls_output, fastrcnn_reg_output = fastrcnn_model.predict([X, rois])
    # (300, 21)
    fastrcnn_cls_output = np.squeeze(fastrcnn_cls_output, axis=0)
    fastrcnn_cls_output = softmax(fastrcnn_cls_output)
    # (300, 80)
    fastrcnn_reg_output = np.squeeze(fastrcnn_reg_output, axis=0)
    # 最大值索引 (300,)
    argmax_cls = np.argmax(fastrcnn_cls_output, axis=1)
    print(argmax_cls)
    # 取出最大类别 (300,)
    max_cls = fastrcnn_cls_output[np.arange(len(argmax_cls)), argmax_cls]
    # (None, 6) -- x1, y1, x2, y2, score, cls
    pred_boxes = bbox_reg_target(fastrcnn_reg_output, argmax_cls, rois, max_cls)
    pred_boxes[:, [0, 2]] = pred_boxes[:, [0, 2]] / ratios[0]
    pred_boxes[:, [1, 3]] = pred_boxes[:, [1, 3]] / ratios[1]

    # 非极大值抑制抑制
    keep_ind = py_cpu_nms(pred_boxes, thresh)
    final_boxes = pred_boxes[keep_ind, :]
    # draw_rect
    # final_boxes = [[50, 50, 200, 300, 0.9, 1]]
    for idx in range(len(final_boxes)):
        # x1, y1, x2, y2, score, cls
        x1, y1, x2, y2, score, cls = final_boxes[idx]
        # ratios 缩放回原图大小
        x1, x2 = int(round(x1 / ratios[0])), int(round(x2 / ratios[0]))
        y1, y2 = int(round(y1 / ratios[1])), int(round(y2 / ratios[1]))
        color = (int(class_color_mapping.get(cls)[0]),
                 int(class_color_mapping.get(cls)[1]),
                 int(class_color_mapping.get(cls)[2]))
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 1)
        text_lable = '%s: %s' % (class_names[cls], score)
        textOrg = (x1, y1 - 3)
        # cv2.rectangle(img, (x1, y1 - 6), (x2, y1), color, -1)
        cv2.putText(img, text_lable, textOrg, cv2.FONT_HERSHEY_PLAIN, 0.4, color, 1)
    cv2.imwrite(os.path.join(output_path, file_name), img)