def test(): if FLAGS.word_level: vec_file = 'temp/ml/vec_w_tfidf.pkl' model_file = 'temp/ml/nbsvm_model_w.bin' else: vec_file = 'temp/ml/vec_c_tfidf.pkl' model_file = 'temp/ml/nbsvm_model_c.bin' x = joblib.load(vec_file) model = joblib.load(model_file) predicts = model.predict(x) utils.generate_result(y_pred=predicts, filename='result_xgb.csv')
def test(): if FLAGS.word_level: x = utils.load_data(False, True) model_file = 'temp/ml/fast_model_w.bin' else: x = utils.load_data(False, False) model_file = 'temp/ml/fast_model_c.bin' x = [' '.join(sentence) for sentence in x] model = fastText.load_model(model_file) preds = [] for sentence in x: labels, prob = model.predict(sentence, k=1) preds.append(utils.tag2id[labels[0][-4:]]) utils.generate_result(y_pred=preds, filename='result_fast.csv')
def find_sift(im_source, im_search, threshold=0.8, rgb=True, good_ratio=FILTER_RATIO): """基于sift进行图像识别,只筛选出最优区域.""" # 第一步:检验图像是否正常: if not (im_source is not None and im_source.any() and im_search is not None and im_search.any()): return None # 第二步:获取特征点集并匹配出特征点对: 返回值 good, pypts, kp_sch, kp_src kp_sch, kp_src, good = _get_key_points(im_source, im_search, good_ratio) # 第三步:根据匹配点对(good),提取出来识别区域: if len(good) == 0: # 匹配点对为0,无法提取识别区域: return None elif len(good) == 1: # 匹配点对为1,可信度赋予设定值,并直接返回: return _handle_one_good_points( kp_src, good, threshold) if ONE_POINT_CONFI >= threshold else None elif len(good) == 2: # 匹配点对为2,根据点对求出目标区域,据此算出可信度: origin_result = _handle_two_good_points(im_source, im_search, kp_src, kp_sch, good) if isinstance(origin_result, dict): return origin_result if ONE_POINT_CONFI >= threshold else None else: middle_point, pypts, w_h_range = _handle_two_good_points( im_source, im_search, kp_src, kp_sch, good) elif len(good) == 3: # 匹配点对为3,取出点对,求出目标区域,据此算出可信度: origin_result = _handle_three_good_points(im_source, im_search, kp_src, kp_sch, good) if isinstance(origin_result, dict): return origin_result if ONE_POINT_CONFI >= threshold else None else: middle_point, pypts, w_h_range = _handle_three_good_points( im_source, im_search, kp_src, kp_sch, good) else: # 匹配点对 >= 4个,使用单矩阵映射求出目标区域,据此算出可信度: middle_point, pypts, w_h_range = _many_good_pts( im_source, im_search, kp_sch, kp_src, good) # 第四步:根据识别区域,求出结果可信度,并将结果进行返回: # 对识别结果进行合理性校验: 小于5个像素的,或者缩放超过5倍的,一律视为不合法直接raise. _target_error_check(w_h_range) # 将截图和识别结果缩放到大小一致,准备计算可信度 x_min, x_max, y_min, y_max, w, h = w_h_range target_img = im_source[y_min:y_max, x_min:x_max] resize_img = cv2.resize(target_img, (w, h)) confidence = _cal_sift_confidence(im_search, resize_img, rgb=rgb) best_match = generate_result(middle_point, pypts, confidence) print("[aircv][sift] threshold=%s, result=%s" % (threshold, best_match)) return best_match if confidence >= threshold else None
def test(): if FLAGS.word_level: vec_file='temp/ml/vec_w_tfidf.pkl' model_file='temp/ml/xgb_model_w.bin' else: vec_file = 'temp/ml/vec_c_tfidf.pkl' model_file = 'temp/ml/xgb_model_c.bin' x=joblib.load(vec_file) print x.shape dtest=xgb.DMatrix(data=x) bst_new = xgb.Booster({'nthread': 40}) # init model bst_new.load_model(model_file) # load data print 'Predict started!' predicts=bst_new.predict(dtest,ntree_limit=176) utils.generate_result(y_pred=predicts,filename='result_xgb.csv')
def _handle_one_good_points(kp_src, good, threshold): """sift匹配中只有一对匹配的特征点对的情况.""" # 识别中心即为该匹配点位置: middle_point = int(kp_src[good[0].trainIdx].pt[0]), int( kp_src[good[0].trainIdx].pt[1]) confidence = ONE_POINT_CONFI # 单个特征点对,识别区域无效化: pypts = [middle_point for i in range(4)] result = generate_result(middle_point, pypts, confidence) return None if confidence < threshold else result
def get(self): checkLoc = self.get_argument("location", None) checkRadius = self.get_argument("radius", 50) pageToken = self.get_argument("pagetoken", None) if not self.cache: logging.info("Redis is not connected") self.write(json.dumps({"err_code": 2})) self.finish() if pageToken is None: logging.info("request the first 20 Pois") if checkLoc is None: logging.info("no location argument") self.write(json.dumps({"err_code": 1})) self.finish() lat, lng = checkLoc.split(",") cache_key = "%s_%s" % (checkLoc, checkRadius) if self.cache.get(cache_key): logging.info("hit cache key: %s" % cache_key) ret_data = '''{"err_code": 0, "results": %s } ''' % ( self.cache.get(cache_key)) else: logging.info("unhit cache key: %s, fetch from api." % cache_key) http_client = tornado.httpclient.AsyncHTTPClient() http_request_uri = "{domain}/nearbysearch/json?location={loc}&radius={r}&{api}".format( domain=config.SERVICE_ADDR, loc=checkLoc, r=checkRadius, api=config.API_PARAMS) print http_request_uri logging.info("GetPoiList request: %s" % http_request_uri) response = yield http_client.fetch(http_request_uri) result_list, next_page_token = utils.parse_poi_data( response.body) poi_list = utils.filter_poi_infos(result_list) utils.calculate_position(poi_list, float(lat), float(lng)) results = utils.generate_result(poi_list) ret_data = '''{"err_code": 0, "results": %s, "next_page_token": "%s"} ''' % ( json.dumps(results['results']), next_page_token) self.cache.setex(cache_key, 300, json.dumps(results['results'])) else: logging.info("request more Pois by page token parameter") pass self.write(ret_data) self.finish()
def predict(): ################################################################################ # NN model # ################################################################################ x, y = utils.read_file(is_train=True,label_list=['人类作者','自动摘要','机器作者','机器翻译']) x = utils.process(x) x = utils.truncation(x) word2id,id2word,tag2id,id2tag=utils.build_vocab(x,y,min_df=10) test_x=utils.read_file(is_train=False) test_x = utils.process(test_x) test_x = utils.truncation(test_x) test_x = utils.build_x_ids(test_x,word2id) vocab_size=len(word2id) emb_dim=100 num_classes=len(tag2id) print "测试集数据大小:%d" % (len(test_x)) print "vocab_size:%d num_classes:%d" % (vocab_size,num_classes) results=[] g1 = Graph('TextCNN', 'HierarchyCNN',vocab_size,emb_dim,num_classes) results.append(g1.run(test_x)) ################################################################################ # Other model # ################################################################################ ################################################################################ # Ensemble # ################################################################################ final_result=ensemble(results) utils.generate_result(final_result,id2tag,'result_nn.csv')
def get(self): checkLoc = self.get_argument("location", None) checkRadius = self.get_argument("radius", 50) pageToken = self.get_argument("pagetoken", None) if not self.cache: logging.info("Redis is not connected") self.write(json.dumps({"err_code" : 2})) self.finish() if pageToken is None: logging.info("request the first 20 Pois") if checkLoc is None: logging.info("no location argument") self.write(json.dumps({"err_code" : 1})) self.finish() lat, lng = checkLoc.split(",") cache_key = "%s_%s" % (checkLoc, checkRadius) if self.cache.get(cache_key): logging.info("hit cache key: %s" % cache_key) ret_data = '''{"err_code": 0, "results": %s } ''' % (self.cache.get(cache_key)) else: logging.info("unhit cache key: %s, fetch from api." % cache_key) http_client = tornado.httpclient.AsyncHTTPClient() http_request_uri = "{domain}/nearbysearch/json?location={loc}&radius={r}&{api}".format( domain = config.SERVICE_ADDR, loc = checkLoc, r = checkRadius, api = config.API_PARAMS ) print http_request_uri logging.info("GetPoiList request: %s" % http_request_uri) response = yield http_client.fetch(http_request_uri) result_list, next_page_token = utils.parse_poi_data(response.body) poi_list = utils.filter_poi_infos(result_list) utils.calculate_position(poi_list, float(lat), float(lng)) results = utils.generate_result(poi_list) ret_data = '''{"err_code": 0, "results": %s, "next_page_token": "%s"} ''' % (json.dumps(results['results']), next_page_token) self.cache.setex(cache_key, 300, json.dumps(results['results'])) else: logging.info("request more Pois by page token parameter") pass self.write(ret_data) self.finish()
def find_template(im_source, im_search, threshold=0.8, rgb=False): """函数功能:找到最优结果.""" # 第一步:校验图像输入 check_source_larger_than_search(im_source, im_search) # 第二步:计算模板匹配的结果矩阵res res = _get_template_result_matrix(im_source, im_search) # 第三步:依次获取匹配结果 min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) h, w = im_search.shape[:2] # 求取可信度: confidence = _get_confidence_from_matrix(im_source, im_search, max_loc, max_val, w, h, rgb) # 求取识别位置: 目标中心 + 目标区域: middle_point, rectangle = _get_target_rectangle(max_loc, w, h) best_match = generate_result(middle_point, rectangle, confidence) return best_match if confidence >= threshold else None
def _two_good_points(pts_sch1, pts_sch2, pts_src1, pts_src2, im_search, im_source): """返回两对匹配特征点情形下的识别结果.""" # 先算出中心点(在im_source中的坐标): middle_point = [ int((pts_src1[0] + pts_src2[0]) / 2), int((pts_src1[1] + pts_src2[1]) / 2) ] pypts = [] # 如果特征点同x轴或同y轴(无论src还是sch中),均不能计算出目标矩形区域来,此时返回值同good=1情形 if pts_sch1[0] == pts_sch2[0] or pts_sch1[1] == pts_sch2[1] or pts_src1[ 0] == pts_src2[0] or pts_src1[1] == pts_src2[1]: confidence = ONE_POINT_CONFI one_match = generate_result(middle_point, pypts, confidence) return one_match # 计算x,y轴的缩放比例:x_scale、y_scale,从middle点扩张出目标区域:(注意整数计算要转成浮点数结果!) h, w = im_search.shape[:2] h_s, w_s = im_source.shape[:2] x_scale = abs(1.0 * (pts_src2[0] - pts_src1[0]) / (pts_sch2[0] - pts_sch1[0])) y_scale = abs(1.0 * (pts_src2[1] - pts_src1[1]) / (pts_sch2[1] - pts_sch1[1])) # 得到scale后需要对middle_point进行校正,并非特征点中点,而是映射矩阵的中点。 sch_middle_point = int((pts_sch1[0] + pts_sch2[0]) / 2), int( (pts_sch1[1] + pts_sch2[1]) / 2) middle_point[0] = middle_point[0] - int( (sch_middle_point[0] - w / 2) * x_scale) middle_point[1] = middle_point[1] - int( (sch_middle_point[1] - h / 2) * y_scale) middle_point[0] = max(middle_point[0], 0) # 超出左边界取0 (图像左上角坐标为0,0) middle_point[0] = min(middle_point[0], w_s - 1) # 超出右边界取w_s-1 middle_point[1] = max(middle_point[1], 0) # 超出上边界取0 middle_point[1] = min(middle_point[1], h_s - 1) # 超出下边界取h_s-1 # 计算出来rectangle角点的顺序:左上角->左下角->右下角->右上角, 注意:暂不考虑图片转动 # 超出左边界取0, 超出右边界取w_s-1, 超出下边界取0, 超出上边界取h_s-1 x_min, x_max = int(max(middle_point[0] - (w * x_scale) / 2, 0)), int( min(middle_point[0] + (w * x_scale) / 2, w_s - 1)) y_min, y_max = int(max(middle_point[1] - (h * y_scale) / 2, 0)), int( min(middle_point[1] + (h * y_scale) / 2, h_s - 1)) # 目标矩形的角点按左上、左下、右下、右上的点序:(x_min,y_min)(x_min,y_max)(x_max,y_max)(x_max,y_min) pts = np.float32([[x_min, y_min], [x_min, y_max], [x_max, y_max], [x_max, y_min]]).reshape(-1, 1, 2) for npt in pts.astype(int).tolist(): pypts.append(tuple(npt[0])) return middle_point, pypts, [x_min, x_max, y_min, y_max, w, h]
def find_all_template(im_source, im_search, threshold=0.8, rgb=False, max_count=10): """根据输入图片和参数设置,返回所有的图像识别结果.""" # 第一步:校验图像输入 check_source_larger_than_search(im_source, im_search) # 第二步:计算模板匹配的结果矩阵res res = _get_template_result_matrix(im_source, im_search) # 第三步:依次获取匹配结果 result = [] h, w = im_search.shape[:2] while True: # 本次循环中,取出当前结果矩阵中的最优值 min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) # 求取可信度: confidence = _get_confidence_from_matrix(im_source, im_search, max_loc, max_val, w, h, rgb) if confidence < threshold or len(result) > max_count: break # 求取识别位置: 目标中心 + 目标区域: middle_point, rectangle = _get_target_rectangle(max_loc, w, h) one_good_match = generate_result(middle_point, rectangle, confidence) result.append(one_good_match) # 屏蔽已经取出的最优结果,进入下轮循环继续寻找: # cv2.floodFill(res, None, max_loc, (-1000,), max(max_val, 0), flags=cv2.FLOODFILL_FIXED_RANGE) cv2.rectangle(res, (int(max_loc[0] - w / 2), int(max_loc[1] - h / 2)), (int(max_loc[0] + w / 2), int(max_loc[1] + h / 2)), (0, 0, 0), -1) return result if result else None