def load(self, model_path): """加载模型文件 [in] model_path: str, 模型文件加载地址 """ logging.info("load model from: {}".format(model_path)) start_time = time.time() self.label_list = None self.feature_name_list = list() self.feature_weight_dict = dict() self.softmax_feature_weight_dict = dict() with codecs.open(model_path, "r", "gb18030") as rf: for index, line in enumerate(rf): line = line.strip("\n") if index == 0: self.label_list = [int(x) for x in line.split(" ")[-1].split(",")] class_num = len(self.label_list) assert class_num > 1, "class num should greater than 1, actual {}".format(class_num) continue feature_id, feature_name, weights_str = line.strip("\n").split("\t") self.feature_name_list.append(feature_name) weights = [float(x) for x in weights_str.split(" ")] assert len(weights) == class_num, "wrong weight num at line #%d, expect %d, actual %d." \ % (index+1, class_num, len(weights)) self.feature_weight_dict[feature_name] = weights self.softmax_feature_weight_dict[feature_name] = softmax(weights, axis=1) logging.info("cost time %.4fs." % (time.time() - start_time))
def gen_feature_weight_dict(self, liblinear_model_path, feature_name_list): """根据liblinear生成的模型文件和特征保留文件生成线上需要的multiclass特征权重文件 [in] liblinear_model_path: str, liblinear模型文件地址 feature_name_list: list(str), 特征字面列表, 其顺序应与liblinear模型文件中特征的顺序一致 """ self.label_list = list() self.feature_name_list = feature_name_list self.feature_weight_dict = dict() self.softmax_feature_weight_dict = dict() # 读入模型文件中的特征权值 # 按类别值从小到大的顺序给出 class_num = None feature_index = 0 logging.info("gen feature weight file from liblinear model: %s." % liblinear_model_path) start_time = time.time() with codecs.open(liblinear_model_path, "r", "gb18030") as rf: for index, line in enumerate(rf): line = line.strip("\n") if index == 1: class_num = int(line.split(" ")[-1]) elif index == 2: labels = [int(x) for x in line.split(" ")[1:]] assert class_num == len(labels), "class num(%d) != labels size(%d)." % (class_num, len(labels)) # 数组a=[3,5,4,2,1] 则arg_sort(a)=[4,3,0,2,1]:表示第4位是a数组中值最小的,第3位是a数组中第二小的 # index_transfer是类别数组,各列按类别的值(类别由LabelEncoder编码后,为整数,可比较)排序的结果 # index_transfer[i]=j表示,第i小的类别是第j列 index_transfer = arg_sort(labels) logging.info("position rank: " + ",".join([str(x) for x in index_transfer])) for class_index in range(class_num): self.label_list.append(str(labels[index_transfer[class_index]])) if index < 6: continue # liblinear权值文件 最后会有一个空格 weights = line.strip(" ").split(" ") assert len(weights) == class_num or class_num == 2, \ "wrong weight num at line #%d, expect %d, actual %d." % (index+1, class_num, len(weights)) reordered_weights = list() if class_num > 2: for weight_index in range(class_num): reordered_weights.append(float(weights[index_transfer[weight_index]])) else: # 二分类时 liblinear权重只有一维 该权重是第一个label的权重 该权重取反作为第二个label的权重 # 此时weights只有一列 reordered_weights = [0, 0] # 权值是针对第一个label的 reordered_weights[index_transfer[0]] = float(weights[0]) # 第二个label为该权值取反 reordered_weights[index_transfer[1]] = -float(weights[0]) self.feature_weight_dict[self.feature_name_list[feature_index]] = reordered_weights self.softmax_feature_weight_dict[self.feature_name_list[feature_index]] = \ softmax(reordered_weights, axis=1) feature_index += 1 logging.info("cost time %.4fs." % (time.time() - start_time)) self.model_loaded = True
def post_process(self, im, sim_ops, scale_factor): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Post-processing of the results from network. This function can be used to visualize data from hardware. """ prob = softmax(sim_ops[0][0]) preds = (np.argsort(prob)[::-1])[0:5] for p in preds: print_msg(str(classes[p]) + ' , ' + str(prob[p]), 3)
def next(self): values = [] while True: line = self.stream.next().strip() if not line: break values.append(self.__parse_line(line)) # log.debug("Predictions: {}".format(values)) labels = self.labels if self.labels else range(len(values)) probs = softmax([-v for v in values]) # log.debug("Probabilities: {}".format(probs)) preds = zip(labels, probs) if self.sort: preds.sort(key=lambda x: x[1], reverse=True) return preds
def format_alternatives(self, cword, preds, nexttok): pred = self.predicted_word(cword, preds) alts = [] if pred.lower() != cword.err.lower(): # Use the predicted probability for both source/original word and # prediction, next makes then true probabilities with softmax # function. prob_err = dict(preds)[cword.src] prob_pred = preds[0][1] prob_err, prob_pred = softmax([prob_err, prob_pred]) # Alternatively, use the sum of probabilities for confusion words # differ from source/original word as the probability for # prediction. # prob_pred = 1.0 - prob_err alts.append(self.__plf_alternative(cword.err, nexttok, prob_err)) alts.append(self.__plf_alternative(pred, nexttok, prob_pred)) return ','.join(alts)
def _main(args): img_path = os.path.expanduser(args.img_path) file_name = os.path.basename(img_path) net = args.net output_path = args.output if not os.path.exists(output_path): os.mkdir(output_path) img = cv2.imread(img_path) X, ratios = preprocess_test_img(img, config) # 输入tensor input_tensor = Input(shape=(img_height, img_widht, 3)) input_rois = Input(shape=(None, 5)) # shares conv model_body = get_model_body(input_tensor, net=net) # rpn rpn_model = get_rpn_model(model_body, anchors_num) # fastrcnn fastrcnn_model = get_fastrcnn_model(model_body=model_body, input_rois=input_rois, classes_num=classes_num, keep_prob=keep_prob) # load weights if not os.path.exists(args.rpn_model_weights_path) or \ not os.path.exists(args.fastrccn_model_weights_path): raise ValueEmptyException('rpn_model_weights_path or fastrccn_model_weights_path is null, please check it') rpn_model.load_weights(args.rpn_model_weights_path, by_name=True) fastrcnn_model.load_weights(args.fastrccn_model_weights_path, by_name=True) rpn_bbox_cls, rpn_bbox_pred = rpn_model.predict(X) # shape (None, 5) (batch_id, x1, y1, x2, y2) rois = proposal_layer(rpn_bbox_cls, rpn_bbox_pred, (img_widht, img_height), feat_stride, eval_mode=True) rois = np.expand_dims(rois, axis=0) # (1, 300, 21) (1, 300, 80) fastrcnn_cls_output, fastrcnn_reg_output = fastrcnn_model.predict([X, rois]) # (300, 21) fastrcnn_cls_output = np.squeeze(fastrcnn_cls_output, axis=0) fastrcnn_cls_output = softmax(fastrcnn_cls_output) # (300, 80) fastrcnn_reg_output = np.squeeze(fastrcnn_reg_output, axis=0) # 最大值索引 (300,) argmax_cls = np.argmax(fastrcnn_cls_output, axis=1) print(argmax_cls) # 取出最大类别 (300,) max_cls = fastrcnn_cls_output[np.arange(len(argmax_cls)), argmax_cls] # (None, 6) -- x1, y1, x2, y2, score, cls pred_boxes = bbox_reg_target(fastrcnn_reg_output, argmax_cls, rois, max_cls) pred_boxes[:, [0, 2]] = pred_boxes[:, [0, 2]] / ratios[0] pred_boxes[:, [1, 3]] = pred_boxes[:, [1, 3]] / ratios[1] # 非极大值抑制抑制 keep_ind = py_cpu_nms(pred_boxes, thresh) final_boxes = pred_boxes[keep_ind, :] # draw_rect # final_boxes = [[50, 50, 200, 300, 0.9, 1]] for idx in range(len(final_boxes)): # x1, y1, x2, y2, score, cls x1, y1, x2, y2, score, cls = final_boxes[idx] # ratios 缩放回原图大小 x1, x2 = int(round(x1 / ratios[0])), int(round(x2 / ratios[0])) y1, y2 = int(round(y1 / ratios[1])), int(round(y2 / ratios[1])) color = (int(class_color_mapping.get(cls)[0]), int(class_color_mapping.get(cls)[1]), int(class_color_mapping.get(cls)[2])) cv2.rectangle(img, (x1, y1), (x2, y2), color, 1) text_lable = '%s: %s' % (class_names[cls], score) textOrg = (x1, y1 - 3) # cv2.rectangle(img, (x1, y1 - 6), (x2, y1), color, -1) cv2.putText(img, text_lable, textOrg, cv2.FONT_HERSHEY_PLAIN, 0.4, color, 1) cv2.imwrite(os.path.join(output_path, file_name), img)