예제 #1
0
def test():
    if FLAGS.word_level:
        vec_file = 'temp/ml/vec_w_tfidf.pkl'
        model_file = 'temp/ml/nbsvm_model_w.bin'
    else:
        vec_file = 'temp/ml/vec_c_tfidf.pkl'
        model_file = 'temp/ml/nbsvm_model_c.bin'

    x = joblib.load(vec_file)
    model = joblib.load(model_file)
    predicts = model.predict(x)
    utils.generate_result(y_pred=predicts, filename='result_xgb.csv')
예제 #2
0
def test():
    if FLAGS.word_level:
        x = utils.load_data(False, True)
        model_file = 'temp/ml/fast_model_w.bin'
    else:
        x = utils.load_data(False, False)
        model_file = 'temp/ml/fast_model_c.bin'
    x = [' '.join(sentence) for sentence in x]
    model = fastText.load_model(model_file)
    preds = []
    for sentence in x:
        labels, prob = model.predict(sentence, k=1)
        preds.append(utils.tag2id[labels[0][-4:]])
    utils.generate_result(y_pred=preds, filename='result_fast.csv')
예제 #3
0
파일: sift.py 프로젝트: vvchina/Airtest
def find_sift(im_source,
              im_search,
              threshold=0.8,
              rgb=True,
              good_ratio=FILTER_RATIO):
    """基于sift进行图像识别,只筛选出最优区域."""
    # 第一步:检验图像是否正常:
    if not (im_source is not None and im_source.any() and im_search is not None
            and im_search.any()):
        return None

    # 第二步:获取特征点集并匹配出特征点对: 返回值 good, pypts, kp_sch, kp_src
    kp_sch, kp_src, good = _get_key_points(im_source, im_search, good_ratio)

    # 第三步:根据匹配点对(good),提取出来识别区域:
    if len(good) == 0:
        # 匹配点对为0,无法提取识别区域:
        return None
    elif len(good) == 1:
        # 匹配点对为1,可信度赋予设定值,并直接返回:
        return _handle_one_good_points(
            kp_src, good, threshold) if ONE_POINT_CONFI >= threshold else None
    elif len(good) == 2:
        # 匹配点对为2,根据点对求出目标区域,据此算出可信度:
        origin_result = _handle_two_good_points(im_source, im_search, kp_src,
                                                kp_sch, good)
        if isinstance(origin_result, dict):
            return origin_result if ONE_POINT_CONFI >= threshold else None
        else:
            middle_point, pypts, w_h_range = _handle_two_good_points(
                im_source, im_search, kp_src, kp_sch, good)
    elif len(good) == 3:
        # 匹配点对为3,取出点对,求出目标区域,据此算出可信度:
        origin_result = _handle_three_good_points(im_source, im_search, kp_src,
                                                  kp_sch, good)
        if isinstance(origin_result, dict):
            return origin_result if ONE_POINT_CONFI >= threshold else None
        else:
            middle_point, pypts, w_h_range = _handle_three_good_points(
                im_source, im_search, kp_src, kp_sch, good)
    else:
        # 匹配点对 >= 4个,使用单矩阵映射求出目标区域,据此算出可信度:
        middle_point, pypts, w_h_range = _many_good_pts(
            im_source, im_search, kp_sch, kp_src, good)

    # 第四步:根据识别区域,求出结果可信度,并将结果进行返回:
    # 对识别结果进行合理性校验: 小于5个像素的,或者缩放超过5倍的,一律视为不合法直接raise.
    _target_error_check(w_h_range)
    # 将截图和识别结果缩放到大小一致,准备计算可信度
    x_min, x_max, y_min, y_max, w, h = w_h_range
    target_img = im_source[y_min:y_max, x_min:x_max]
    resize_img = cv2.resize(target_img, (w, h))
    confidence = _cal_sift_confidence(im_search, resize_img, rgb=rgb)

    best_match = generate_result(middle_point, pypts, confidence)
    print("[aircv][sift] threshold=%s, result=%s" % (threshold, best_match))
    return best_match if confidence >= threshold else None
예제 #4
0
def test():
    if FLAGS.word_level:
        vec_file='temp/ml/vec_w_tfidf.pkl'
        model_file='temp/ml/xgb_model_w.bin'
    else:
        vec_file = 'temp/ml/vec_c_tfidf.pkl'
        model_file = 'temp/ml/xgb_model_c.bin'

    x=joblib.load(vec_file)

    print x.shape
    dtest=xgb.DMatrix(data=x)

    bst_new = xgb.Booster({'nthread': 40})  # init model
    bst_new.load_model(model_file)  # load data

    print 'Predict started!'
    predicts=bst_new.predict(dtest,ntree_limit=176)
    utils.generate_result(y_pred=predicts,filename='result_xgb.csv')
예제 #5
0
파일: sift.py 프로젝트: vvchina/Airtest
def _handle_one_good_points(kp_src, good, threshold):
    """sift匹配中只有一对匹配的特征点对的情况."""
    # 识别中心即为该匹配点位置:
    middle_point = int(kp_src[good[0].trainIdx].pt[0]), int(
        kp_src[good[0].trainIdx].pt[1])
    confidence = ONE_POINT_CONFI
    # 单个特征点对,识别区域无效化:
    pypts = [middle_point for i in range(4)]
    result = generate_result(middle_point, pypts, confidence)

    return None if confidence < threshold else result
예제 #6
0
    def get(self):

        checkLoc = self.get_argument("location", None)
        checkRadius = self.get_argument("radius", 50)
        pageToken = self.get_argument("pagetoken", None)

        if not self.cache:
            logging.info("Redis is not connected")
            self.write(json.dumps({"err_code": 2}))
            self.finish()

        if pageToken is None:
            logging.info("request the first 20 Pois")
            if checkLoc is None:
                logging.info("no location argument")
                self.write(json.dumps({"err_code": 1}))
                self.finish()

            lat, lng = checkLoc.split(",")

            cache_key = "%s_%s" % (checkLoc, checkRadius)
            if self.cache.get(cache_key):
                logging.info("hit cache key: %s" % cache_key)
                ret_data = '''{"err_code": 0, "results": %s } ''' % (
                    self.cache.get(cache_key))
            else:
                logging.info("unhit cache key: %s, fetch from api." %
                             cache_key)
                http_client = tornado.httpclient.AsyncHTTPClient()
                http_request_uri = "{domain}/nearbysearch/json?location={loc}&radius={r}&{api}".format(
                    domain=config.SERVICE_ADDR,
                    loc=checkLoc,
                    r=checkRadius,
                    api=config.API_PARAMS)
                print http_request_uri
                logging.info("GetPoiList request: %s" % http_request_uri)
                response = yield http_client.fetch(http_request_uri)
                result_list, next_page_token = utils.parse_poi_data(
                    response.body)
                poi_list = utils.filter_poi_infos(result_list)
                utils.calculate_position(poi_list, float(lat), float(lng))
                results = utils.generate_result(poi_list)
                ret_data = '''{"err_code": 0, "results": %s, "next_page_token": "%s"} ''' % (
                    json.dumps(results['results']), next_page_token)
                self.cache.setex(cache_key, 300,
                                 json.dumps(results['results']))

        else:
            logging.info("request more Pois by page token parameter")
            pass

        self.write(ret_data)
        self.finish()
예제 #7
0
def predict():
    ################################################################################
    #                              NN model                                        #
    ################################################################################
    x, y = utils.read_file(is_train=True,label_list=['人类作者','自动摘要','机器作者','机器翻译'])
    x = utils.process(x)
    x = utils.truncation(x)
    word2id,id2word,tag2id,id2tag=utils.build_vocab(x,y,min_df=10)

    test_x=utils.read_file(is_train=False)
    test_x = utils.process(test_x)
    test_x = utils.truncation(test_x)
    test_x = utils.build_x_ids(test_x,word2id)


    vocab_size=len(word2id)
    emb_dim=100
    num_classes=len(tag2id)

    print "测试集数据大小:%d" % (len(test_x))
    print "vocab_size:%d num_classes:%d" % (vocab_size,num_classes)

    results=[]
    g1 = Graph('TextCNN', 'HierarchyCNN',vocab_size,emb_dim,num_classes)
    results.append(g1.run(test_x))

    ################################################################################
    #                              Other model                                     #
    ################################################################################



    ################################################################################
    #                              Ensemble                                       #
    ################################################################################
    final_result=ensemble(results)
    utils.generate_result(final_result,id2tag,'result_nn.csv')
예제 #8
0
    def get(self):

        checkLoc = self.get_argument("location", None)
        checkRadius = self.get_argument("radius", 50)
        pageToken = self.get_argument("pagetoken", None)

        if not self.cache:
            logging.info("Redis is not connected")
            self.write(json.dumps({"err_code" : 2}))
            self.finish()

        if pageToken is None:
            logging.info("request the first 20 Pois")
            if checkLoc is None:
                logging.info("no location argument")
                self.write(json.dumps({"err_code" : 1}))
                self.finish()

            lat, lng = checkLoc.split(",")

            cache_key = "%s_%s" % (checkLoc, checkRadius)
            if self.cache.get(cache_key):
                logging.info("hit cache key: %s" % cache_key)
                ret_data = '''{"err_code": 0, "results": %s } ''' % (self.cache.get(cache_key))
            else:
                logging.info("unhit cache key: %s, fetch from api." % cache_key)
                http_client = tornado.httpclient.AsyncHTTPClient()
                http_request_uri = "{domain}/nearbysearch/json?location={loc}&radius={r}&{api}".format(
                        domain = config.SERVICE_ADDR,
                        loc = checkLoc,
                        r = checkRadius,
                        api = config.API_PARAMS
                    )
                print http_request_uri
                logging.info("GetPoiList request: %s" % http_request_uri)
                response = yield http_client.fetch(http_request_uri)
                result_list, next_page_token = utils.parse_poi_data(response.body)
                poi_list = utils.filter_poi_infos(result_list)
                utils.calculate_position(poi_list, float(lat), float(lng))
                results = utils.generate_result(poi_list)
                ret_data = '''{"err_code": 0, "results": %s, "next_page_token": "%s"} ''' % (json.dumps(results['results']), next_page_token)
                self.cache.setex(cache_key, 300, json.dumps(results['results']))

        else:
            logging.info("request more Pois by page token parameter")
            pass

        self.write(ret_data)
        self.finish()
예제 #9
0
파일: template.py 프로젝트: vvchina/Airtest
def find_template(im_source, im_search, threshold=0.8, rgb=False):
    """函数功能:找到最优结果."""
    # 第一步:校验图像输入
    check_source_larger_than_search(im_source, im_search)
    # 第二步:计算模板匹配的结果矩阵res
    res = _get_template_result_matrix(im_source, im_search)
    # 第三步:依次获取匹配结果
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
    h, w = im_search.shape[:2]
    # 求取可信度:
    confidence = _get_confidence_from_matrix(im_source, im_search, max_loc,
                                             max_val, w, h, rgb)
    # 求取识别位置: 目标中心 + 目标区域:
    middle_point, rectangle = _get_target_rectangle(max_loc, w, h)
    best_match = generate_result(middle_point, rectangle, confidence)
    return best_match if confidence >= threshold else None
예제 #10
0
파일: sift.py 프로젝트: vvchina/Airtest
def _two_good_points(pts_sch1, pts_sch2, pts_src1, pts_src2, im_search,
                     im_source):
    """返回两对匹配特征点情形下的识别结果."""
    # 先算出中心点(在im_source中的坐标):
    middle_point = [
        int((pts_src1[0] + pts_src2[0]) / 2),
        int((pts_src1[1] + pts_src2[1]) / 2)
    ]
    pypts = []
    # 如果特征点同x轴或同y轴(无论src还是sch中),均不能计算出目标矩形区域来,此时返回值同good=1情形
    if pts_sch1[0] == pts_sch2[0] or pts_sch1[1] == pts_sch2[1] or pts_src1[
            0] == pts_src2[0] or pts_src1[1] == pts_src2[1]:
        confidence = ONE_POINT_CONFI
        one_match = generate_result(middle_point, pypts, confidence)
        return one_match
    # 计算x,y轴的缩放比例:x_scale、y_scale,从middle点扩张出目标区域:(注意整数计算要转成浮点数结果!)
    h, w = im_search.shape[:2]
    h_s, w_s = im_source.shape[:2]
    x_scale = abs(1.0 * (pts_src2[0] - pts_src1[0]) /
                  (pts_sch2[0] - pts_sch1[0]))
    y_scale = abs(1.0 * (pts_src2[1] - pts_src1[1]) /
                  (pts_sch2[1] - pts_sch1[1]))
    # 得到scale后需要对middle_point进行校正,并非特征点中点,而是映射矩阵的中点。
    sch_middle_point = int((pts_sch1[0] + pts_sch2[0]) / 2), int(
        (pts_sch1[1] + pts_sch2[1]) / 2)
    middle_point[0] = middle_point[0] - int(
        (sch_middle_point[0] - w / 2) * x_scale)
    middle_point[1] = middle_point[1] - int(
        (sch_middle_point[1] - h / 2) * y_scale)
    middle_point[0] = max(middle_point[0], 0)  # 超出左边界取0  (图像左上角坐标为0,0)
    middle_point[0] = min(middle_point[0], w_s - 1)  # 超出右边界取w_s-1
    middle_point[1] = max(middle_point[1], 0)  # 超出上边界取0
    middle_point[1] = min(middle_point[1], h_s - 1)  # 超出下边界取h_s-1

    # 计算出来rectangle角点的顺序:左上角->左下角->右下角->右上角, 注意:暂不考虑图片转动
    # 超出左边界取0, 超出右边界取w_s-1, 超出下边界取0, 超出上边界取h_s-1
    x_min, x_max = int(max(middle_point[0] - (w * x_scale) / 2, 0)), int(
        min(middle_point[0] + (w * x_scale) / 2, w_s - 1))
    y_min, y_max = int(max(middle_point[1] - (h * y_scale) / 2, 0)), int(
        min(middle_point[1] + (h * y_scale) / 2, h_s - 1))
    # 目标矩形的角点按左上、左下、右下、右上的点序:(x_min,y_min)(x_min,y_max)(x_max,y_max)(x_max,y_min)
    pts = np.float32([[x_min, y_min], [x_min, y_max], [x_max, y_max],
                      [x_max, y_min]]).reshape(-1, 1, 2)
    for npt in pts.astype(int).tolist():
        pypts.append(tuple(npt[0]))

    return middle_point, pypts, [x_min, x_max, y_min, y_max, w, h]
예제 #11
0
파일: template.py 프로젝트: vvchina/Airtest
def find_all_template(im_source,
                      im_search,
                      threshold=0.8,
                      rgb=False,
                      max_count=10):
    """根据输入图片和参数设置,返回所有的图像识别结果."""
    # 第一步:校验图像输入
    check_source_larger_than_search(im_source, im_search)

    # 第二步:计算模板匹配的结果矩阵res
    res = _get_template_result_matrix(im_source, im_search)

    # 第三步:依次获取匹配结果
    result = []
    h, w = im_search.shape[:2]

    while True:
        # 本次循环中,取出当前结果矩阵中的最优值
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
        # 求取可信度:
        confidence = _get_confidence_from_matrix(im_source, im_search, max_loc,
                                                 max_val, w, h, rgb)

        if confidence < threshold or len(result) > max_count:
            break

        # 求取识别位置: 目标中心 + 目标区域:
        middle_point, rectangle = _get_target_rectangle(max_loc, w, h)
        one_good_match = generate_result(middle_point, rectangle, confidence)

        result.append(one_good_match)

        # 屏蔽已经取出的最优结果,进入下轮循环继续寻找:
        # cv2.floodFill(res, None, max_loc, (-1000,), max(max_val, 0), flags=cv2.FLOODFILL_FIXED_RANGE)
        cv2.rectangle(res, (int(max_loc[0] - w / 2), int(max_loc[1] - h / 2)),
                      (int(max_loc[0] + w / 2), int(max_loc[1] + h / 2)),
                      (0, 0, 0), -1)

    return result if result else None