Пример #1
0
    def insert_to_mysql(self, connection, data):
        """
        插入新数据
        :param connection:
        :param data: type => tuple List or dict
        :return:
        """
        mysql_logger = Logger().logger

        if isinstance(data, dict):
            k_list = [key for key in data.keys()]
            v_list = tuple([value for value in data.values()])
            v_sql = str(v_list)
            if v_sql[-2] == ",":
                v_sql = v_sql[:-2] + ")"
            sql = f"INSERT INTO {self.mysql_table} ({','.join(k_list)}) VALUES{v_sql}"
        elif isinstance(data, (list, tuple)):
            k_list = [key for key in data[0].keys()]
            value_list = list()
            for each in data:
                v_list = str(tuple([value for value in each.values()]))
                if v_list[-2] == ",":
                    v_list = v_list[:-2] + ")"
                value_list.append(v_list)
            sql = f"INSERT INTO {self.mysql_table} ({','.join(k_list)}) VALUES"
            sql = sql + ",".join(value_list)
        else:
            raise Exception("not format type of data")
        try:
            mysql_logger.info(f"网络声量sql==>{sql}")
            count = self.cs_commit(connection=connection, sql=sql)
            mysql_logger.info(f"MySQL 插入成功 {count} 条")
        except Exception as e:
            mysql_logger.exception(f"网络声量  插入失败,ERROR: {e}")
Пример #2
0
    def delete_from_mysql(self, connection, where_condition):
        """
        删除
        :param connection:
        :param where_condition: where 条件
        :return:
        """
        mysql_logger = Logger().logger

        if "where" in where_condition or "WHERE" in where_condition:
            sql = f"DELETE FROM {self.mysql_table} {where_condition}"
        else:
            sql = f"DELETE FROM {self.mysql_table} WHERE {where_condition}"

        try:
            count = self.cs_commit(connection=connection, sql=sql)
            mysql_logger.info(f"MySQL 删除成功 {count} 条")
        except Exception as e:
            mysql_logger.exception(f"MySQL 删除失败,ERROR: {e}")
Пример #3
0
    def update_to_mysql(self, connection, data, where_condition):
        """
        更新数据
        :param connection:
        :param data:
        :param where_condition: where 条件
        :return:
        """
        mysql_logger = Logger().logger

        set_list = list()
        for key, value in data.items():
            set_list.append(f"{key} = \'{value}\'")

        if "where" in where_condition or "WHERE" in where_condition:
            sql = f"UPDATE {self.mysql_table} SET {','.join(set_list)} {where_condition}"
        else:
            sql = f"UPDATE {self.mysql_table} SET {','.join(set_list)} WHERE {where_condition}"

        try:
            count = self.cs_commit(connection=connection, sql=sql)
            mysql_logger.info(f"MySQL 更新成功 {count} 条")
        except Exception as e:
            mysql_logger.exception(f"MySQL 更新失败,ERROR: {e}")
Пример #4
0
class AllToPhoenix(object):
    def __init__(self):
        # "ABCORGANIZE", "BOCOMORGANIZE","BOCORGANIZE", "CBHBORGANIZE", "CCBORGANIZE", "CEBORGANIZE",
        #                   "CGBORGANIZE", "CIBORGANIZE", "CMBCORGANIZE", "CMBORGANIZE", "CZBORGANIZE", "EBCLORGANIZE",
        self.code_list = [
            "ECITICORGANIZE", "HXBORGANIZE", "ICBCORGANIZE", "PABORGANIZE",
            "PSBCORGANIZE", "SPDBORGANIZE"
        ]

        self.logger = Logger().logger
        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.copy_mongo_data_list = list()
        self.remove_id_list = list()
        self.branch_code_list = list()

        self.verify_list = [
            "ID_", "BANK_CODE_", "BANK_NAME_", "CREATE_TIME_", "AREA_CODE_",
            "UNIT_CODE_", "ADDR_", "PROVINCE_NAME_", "PROVINCE_CODE_", "CITY_",
            "CITY_CODE_", "DISTRICT_NAME_", "DISTRICT_CODE_", "LAT_", "LNG_",
            "NAME_", "ENTITY_CODE_", "DEALTIME_", "URL_", "TEL_", "CODE_",
            "BUSINESS_HOURS_", "STATUS_1"
        ]

    # 从 MongoDB 获取数据
    def get_data_from_mongo(self, m_client, collection, entity_code, data_id):
        m_client.mongo_db = "spider_data"
        m_client.mongo_entity_code = entity_code

        if data_id:
            data_id_obj = ObjectId(data_id)
        else:
            data_id_obj = None

        try:
            mongo_data_list = m_client.search_from_mongodb(collection,
                                                           data_id=data_id_obj)
            return mongo_data_list
        except pymongo.errors.ServerSelectionTimeoutError:
            self.logger.info("连接失败,正在重新连接")
            sleep(1)
            mongo_data_list = m_client.search_from_mongodb(collection,
                                                           data_id=data_id_obj)
            return mongo_data_list
        except Exception as e:
            self.logger.info(e)
            return None
        except KeyError as e:
            self.logger.info(e)
            return None

    # 从 MongoDB 删除数据
    def delete_data_from_mongo(self, m_client, collection, entity_code,
                               remove_id_list):
        m_client.mongo_entity_code = entity_code

        try:
            remove_count = m_client.remove_from_mongo(
                collection=collection, remove_id_list=remove_id_list)
            return remove_count
        except pymongo.errors.ServerSelectionTimeoutError:
            mongo_data_list = m_client.remove_from_mongo(
                collection=collection, remove_id_list=remove_id_list)
            return mongo_data_list
        except Exception as e:
            self.logger.info(e)
            return None
        except KeyError as e:
            self.logger.info(e)
            return None

    # 清洗经纬度和 AREA_CODE_
    def shuffle_for_area(self, re_data):
        if "STATUS_1" in re_data:
            del re_data["STATUS_1"]
            re_data["STATUS_"] = "1"
        location_result = get_lat_lng(re_data["ADDR_"])
        if location_result["status"] == 0:
            re_data["LNG_"] = str(location_result["result"]["location"]["lng"])
            re_data["LAT_"] = str(location_result["result"]["location"]["lat"])
            address_result = get_area(lat_lng=re_data["LAT_"] + "," +
                                      re_data["LNG_"])
            # todo use formatted_address or not
            re_data["DISTRICT_NAME_"] = address_result["result"][
                "addressComponent"]["district"]
            re_data["DISTRICT_CODE_"] = address_result["result"][
                "addressComponent"]["adcode"]
            re_data["AREA_CODE_"] = address_result["result"][
                "addressComponent"]["adcode"]
            re_data["CITY_"] = address_result["result"]["addressComponent"][
                "city"]
            re_data["CITY_CODE_"] = address_result["result"][
                "addressComponent"]["adcode"][:4] + "00"
            re_data["PROVINCE_NAME_"] = address_result["result"][
                "addressComponent"]["province"]
            re_data["PROVINCE_CODE_"] = address_result["result"][
                "addressComponent"]["adcode"][:2] + "00"
            # todo use formatted_location or not
            # re_data["LNG_"] = lng_lat.split(",")[0]
            # re_data["LAT_"] = lng_lat.split(",")[1]
        else:
            re_data["LNG_"] = ""
            re_data["LAT_"] = ""

        # 网点 CODE_
        hash_m = hashlib.md5()
        hash_m.update(re_data["ADDR_"].encode("utf-8"))
        hash_addr_ = hash_m.hexdigest()
        re_data["CODE_"] = re_data["BANK_CODE_"] + "_" + re_data[
            "AREA_CODE_"] + "_" + hash_addr_
        # for i in range(1, 10000):
        #         branch_code = "ABC" + "_" + re_data["AREA_CODE_"] + "_" + "00000"
        #         branch_code = branch_code[:len(branch_code)-len(str(i))] + "{}".format(i)
        #         if branch_code in branch_code_list:
        #             continue
        #         else:
        #             branch_code_list.append(branch_code)
        #             break
        # print("*"*150)
        # print(re_data)
        return re_data

    # 主函数
    def run(self):
        # 创建 Phoenix 对象
        p_client = PhoenixHbase(table_name="ORGANIZE_FINASSIST")
        p_client.verify_list = self.verify_list
        # 连接 Phoenix
        connection = p_client.connect_to_phoenix()
        # 创建 MongoDB 查询数据库对象
        m_client = MongoClient(mongo_collection="ORGANIZE_FINASSIST")
        db, collection_list = m_client.client_to_mongodb()
        collection = m_client.get_check_collection(
            db=db, collection_list=collection_list)
        # # 创建 MongoDB spider_data_old 数据库对象
        # old_client = MongoClient(mongo_collection="ORGANIZE_FINASSIST")
        # # 本地测试
        # old_client.client = pymongo.MongoClient(host="localhost", port=27017, serverSelectionTimeoutMS=60,
        #                                         connectTimeoutMS=60, connect=False)
        # old_client.mongo_db = "spider_data_old"
        # db_old, collection_list_old = old_client.client_to_mongodb()
        # collection_old = db_old["ORGANIZE_FINASSIST"]

        # 获取地区编码
        province_list, city_list, area_list, dir_area_list = (GenericScript(
            entity_code=None,
            entity_type="ORGANIZE_FINASSIST").area_from_mysql())

        # # 删除表
        # p_client.drop_table_phoenix(connection=connection)

        # # 创建表
        # # 网点表创建语句
        # sql = ('create table "ORGANIZE_FINASSIST" ("ID_" varchar primary key, "C"."BANK_NAME_" varchar,'
        #        '"C"."BANK_CODE_" varchar, "C"."NAME_" varchar,'
        #        '"C"."CODE_" varchar, "C"."ENTITY_NAME_" varchar, "C"."ENTITY_CODE_" varchar,'
        #        '"C"."AREA_CODE_" varchar, "C"."UNIT_CODE_" varchar, "C"."ADDR_" varchar,'
        #        '"C"."PROVINCE_NAME_" varchar, "C"."PROVINCE_CODE_" varchar, "C"."CITY_" varchar,'
        #        '"C"."CITY_CODE_" varchar, "C"."DISTRICT_NAME_" varchar, "C". "DISTRICT_CODE_" varchar,'
        #        '"C"."LAT_" varchar, "C"."LNG_" varchar, "C"."CREATE_TIME_" varchar, "C"."DEALTIME_" varchar,'
        #        '"C"."URL_" varchar, "C"."TEL_" varchar, "C"."BUSINESS_HOURS_" varchar, "C"."STATUS_" varchar,'
        #        '"C"."IMPORTANCE_" varchar) IMMUTABLE_ROWS = true')
        #
        # p_client.create_new_table_phoenix(connection=connection, sql=sql)

        # 遍历 ENTITY_CODE_ 列表
        # self.code_list = ["ABCORGANIZE"]
        for entity_code in self.code_list:
            status = False
            module_name = __import__(entity_code)
            self.logger.info("开始进行 ENTITY_CODE_ {}".format(entity_code))

            self.remove_id_list = []
            self.copy_mongo_data_list = []
            self.branch_code_list = []
            # find_id = ""
            if entity_code == "ECITICORGANIZE":
                find_id = "5c3f48479bb3df1d97d762e1"
            else:
                find_id = None
            try:
                mongo_data_list = self.get_data_from_mongo(
                    m_client=m_client,
                    collection=collection,
                    entity_code=entity_code,
                    data_id=find_id)
            except pymongo.errors.ServerSelectionTimeoutError:
                sleep(1)
                mongo_data_list = self.get_data_from_mongo(
                    m_client=m_client,
                    collection=collection,
                    entity_code=entity_code,
                    data_id=find_id)

            # 清洗数据并插入 HBase
            if mongo_data_list:
                once_count = 0
                self.find_count = mongo_data_list.count()
                for data in mongo_data_list:
                    data_id = data["_id"]
                    # copy_data = {}
                    # self.remove_id_list.append(data_id)
                    try:
                        del data["_id"]
                        # copy_data = deepcopy(data)
                        # self.copy_mongo_data_list.append(copy_data)
                        re_data = module_name.data_shuffle(
                            data, province_list, city_list, area_list)
                        if not re_data:
                            self.bad_count += 1
                            continue
                    except Exception as e:
                        # except jpype._jexception.SQLExceptionPyRaisable as e:
                        # except org.apache.phoenix.exception.BatchUpdateExecution as e:
                        # self.remove_id_list.remove(data_id)
                        # self.copy_mongo_data_list.remove(copy_data)
                        self.logger.exception("清洗错误,错误 _id 为{}, {}".format(
                            data_id, e))
                        continue

                    print(data_id)

                    if isinstance(re_data, list):
                        for list_data in re_data:
                            area_data = ""
                            try:
                                # self.logger.info("_id {}".format(data_id))
                                area_data = self.shuffle_for_area(list_data)
                            except Exception as e:
                                # self.remove_id_list.remove(data_id)
                                # self.copy_mongo_data_list.remove(copy_data)
                                self.logger.exception(
                                    "_id:{} 获取经纬度失败, {}".format(data_id, e))
                                continue
                            # except ValueError:
                            #     pass
                            # phoenix_HBase 插入数据
                            if area_data:
                                try:
                                    # print(area_data)
                                    success_count = p_client.upsert_to_phoenix_by_one(
                                        connection=connection, data=area_data)
                                    once_count += success_count
                                    self.success_count += success_count
                                    # self.logger.info("HBase 插入成功, 成功条数 {} 条".format(success_count))
                                except Exception as e:
                                    # self.remove_id_list.remove(data_id)
                                    # self.copy_mongo_data_list.remove(copy_data)
                                    self.logger.exception(
                                        "HBase 插入 _id 为 {} 的数据失败, {}".format(
                                            data_id, e))
                                    continue
                    elif isinstance(re_data, dict):
                        area_data = ""
                        try:
                            area_data = self.shuffle_for_area(re_data)
                        except urllib3.exceptions.NewConnectionError as e:
                            # self.remove_id_list.remove(data_id)
                            # self.copy_mongo_data_list.remove(copy_data)
                            self.logger.exception("_id: {}获取经纬度失败, {}".format(
                                data_id, e))
                        except Exception as e:
                            # self.remove_id_list.remove(data_id)
                            # self.copy_mongo_data_list.remove(copy_data)
                            self.logger.exception("_id: {}获取经纬度失败, {}".format(
                                data_id, e))
                            continue
                        # phoenix_HBase 插入数据
                        if area_data:
                            try:
                                # print(area_data)
                                success_count = p_client.upsert_to_phoenix_by_one(
                                    connection=connection, data=area_data)
                                once_count += success_count
                                self.success_count += success_count
                                # self.logger.info("HBase 插入成功, 成功条数 {} 条".format(success_count))
                            except Exception as e:
                                # self.remove_id_list.remove(data_id)
                                # self.copy_mongo_data_list.remove(copy_data)
                                self.logger.exception(
                                    "HBase 插入 _id 为 {} 的数据失败, {}".format(
                                        data_id, e))
                                continue
                    if self.success_count % 100 == 0:
                        self.logger.info("HBase 插入成功, 成功条数 {} 条".format(
                            self.success_count))
                    # 添加 {d:1}
                    # if self.success_count % 50 == 0:
                    #     update_count = m_client.update_to_mongodb(collection=collection,
                    #                                               data_id=self.remove_id_list,
                    #                                               data_dict={"d": 1})
                    #     self.remove_id_list = []
                    #     self.remove_count += update_count
                    #     self.logger.info("MongoDB 更新成功")

                mongo_data_list.close()

                # 添加 {d:1}
                # if self.remove_id_list:
                #     update_count = m_client.update_to_mongodb(collection=collection,
                #                                               data_id=self.remove_id_list,
                #                                               data_dict={"d": 1})
                #     self.remove_id_list = []
                #     self.remove_count += update_count
                #     self.logger.info("MongoDB 更新成功")
                if once_count > 0:
                    status = True
                    self.logger.info("HBase 插入成功, 成功条数 {}".format(once_count))
            else:
                continue
            # 删除数据
            # if status:
            # delete_count = self.delete_data_from_mongo(m_client=m_client, collection=collection,
            #                                            entity_code=entity_code,
            #                                            remove_id_list=self.remove_id_list)
            # self.remove_count += delete_count
            # self.logger.info("MongoDB 删除成功")
            # else:
            #     self.logger.info("HBase 插入成功条数0条, 不执行删除")

            # # 将数据插入 spider_data_old 中
            # if status:
            #     try:
            #         old_client.mongo_db = "spider_data_old"
            #         insert_count = old_client.all_to_mongodb(collection=collection_old,
            #                                                  insert_list=self.copy_mongo_data_list)
            #         self.old_count += insert_count
            #         # self.logger.info("MongoDB 插入成功, 成功条数 {}".format(insert_count))
            #     except pymongo.errors.ServerSelectionTimeoutError as e:
            #         sleep(1)
            #         self.logger.info("MongoDB 连接失败, 正在重新连接 {}".format(e))
            #         insert_count = old_client.all_to_mongodb(collection=collection_old,
            #                                                  insert_list=self.copy_mongo_data_list)
            #         self.old_count += insert_count
            #         # self.logger.info("MongoDB 插入成功, 成功条数 {}".format(insert_count))
            #     except Exception as e:
            #         self.logger.info(e)

        # 关闭连接
        m_client.client_close()
        p_client.close_client_phoenix(connection=connection)
        self.logger.info("本次共向 MongoDB 查取数据{}条".format(self.find_count))
        self.logger.info("本次共向 HBase 插入数据{}条".format(self.success_count))
        self.logger.info("本次共向 MongoDB 删除数据{}条".format(self.remove_count))
        self.logger.info("本次共向 MongoDB 插入数据{}条".format(self.old_count))
        self.logger.info("本次坏数据共 {} 条".format(self.bad_count))
        self.logger.handlers.clear()
Пример #5
0
class MapBarTransfer(object):
    def __init__(self,
                 table_name="CHA_BRANCH_MAPBAR",
                 collection_name="mapbar"):
        # phoenix connection
        self.p_client = PhoenixHbase(table_name=table_name)
        self.connection = self.p_client.connect_to_phoenix()
        # MongoDB connection
        self.m_client = MongoClient(mongo_collection=collection_name,
                                    entity_code="MAPBAR_DEATAIL_BJ")
        self.m_client.mongo_host = "172.22.69.35"
        self.m_client.mongo_port = 20000
        self.m_client.client = pymongo.MongoClient(host="172.22.69.35",
                                                   port=20000,
                                                   serverSelectionTimeoutMS=60,
                                                   connectTimeoutMS=60,
                                                   connect=False)
        self.db, self.collection_list = self.m_client.client_to_mongodb()
        self.collection = self.m_client.get_check_collection(
            db=self.db, collection_list=self.collection_list)
        # Log
        self.logger = Logger().logger
        # count
        self.count = 0

    def main(self):
        # # 创建表
        # table_sql = (f'create table "{self.p_client.table_name}" ("ID_" varchar primary key,'
        #              '"C"."BTYPE_" varchar, "C"."TYPE_" varchar, "C"."NAME_" varchar, "C"."UPDATETIME_" varchar,'
        #              '"C"."ADDRESS_" varchar, "C"."POINAME_" varchar, "C"."PHONE_" varchar, "C"."BUSSTOP_" varchar,'
        #              '"C"."BUS_" varchar, "C"."URL_" varchar, "C"."DEALTIME_" varchar, "C"."DATETIME_" varchar,'
        #              '"C"."ENTITY_NAME_" varchar, "C"."ENTITY_CODE_" varchar, "C"."LAT_" varchar, "C"."LNG_" varchar'
        #              ') IMMUTABLE_ROWS = true')
        # self.p_client.create_new_table_phoenix(connection=self.connection, sql=table_sql)

        # 获取数据
        # mongo_data_list = self.m_client.all_from_mongodb(collection=self.collection)
        mongo_data_list = self.m_client.search_from_mongodb(
            collection=self.collection,
            field_name="DEALTIME_",
            field_value={"$gt": "1555136656.0579224"},
            data_id="5cb65fac9bb3df61a09c6625")

        count = 0
        while True:
            # 取一条处理
            try:
                data = mongo_data_list.__next__()
            except StopIteration:
                break
            except pymongo.errors.ServerSelectionTimeoutError:
                time.sleep(3)
                data = mongo_data_list.__next__()

            # 清洗
            try:
                data["PHONE_"] = data["PHONE_"].replace("无,", "")
                u_time_list = re.findall(r"(\d{4}年\d{1,2}月\d{1,2})日",
                                         data["UPDATETIME_"])
                if u_time_list:
                    u_ = u_time_list[0].replace("年", "-")
                    u_ = u_.replace("月", "-")
                    u_l = u_.split("-")
                    if len(u_l[1]) == 1:
                        u_l[1] = "0" + u_l[1]
                    if len(u_l[2]) == 1:
                        u_l[2] = "0" + u_l[2]
                    data["UPDATETIME_"] = "-".join(u_l)
            except Exception as e:
                self.logger.exception(f"数据清洗出错, _id: {data['_id']}, error {e}")
                continue

            # 获取经纬度
            try:
                if data["ADDRESS_"]:
                    data["ADDRESS_"] = "".join(data["ADDRESS_"].split("|")[1:])
                    location_result = get_lat_lng(address=data["ADDRESS_"])
                    if location_result["status"] == 0:
                        data["LNG_"] = str(
                            location_result["result"]["location"]["lng"])
                        data["LAT_"] = str(
                            location_result["result"]["location"]["lat"])
                    else:
                        self.logger.warning(f"_id: {data['_id']} 获取经纬度失败")
                else:
                    continue
            except Exception as e:
                self.logger.exception(
                    f"_id: {data['_id']} 获取经纬度失败, error: {e}")
                continue
            # upsert to HBase
            try:
                re_data = self.__check_lat(data=data)
                # 向 HBase 中插入一条
                self.p_client.upsert_to_phoenix_by_one(
                    connection=self.connection, data=re_data)
                count += 1
                if count % 100 == 0:
                    self.logger.info(
                        f"HBase 插入成功, _id: {data['_id']}, 成功条数 {count}")
            except Exception as e:
                self.logger.exception(
                    f"HBase 插入失败, _id: {data['_id']}, error: {e}")
                continue

        # 关闭 MongoDB cursor
        mongo_data_list.close()
        self.logger.info(
            f"collection: {self.m_client.mongo_collection} 的数据清洗完毕, 成功条数共计: {count} 条"
        )

    def check_lat(self):
        # # 删除表
        # self.p_client.drop_table_phoenix(connection=self.connection, table_name="CHA_BRANCH_MAPBAR")
        #
        # table_sql = (f'create table "CHA_BRANCH_MAPBAR" ("ID_" varchar primary key,'
        #              '"C"."BTYPE_" varchar, "C"."TYPE_" varchar, "C"."NAME_" varchar, "C"."UPDATETIME_" varchar,'
        #              '"C"."ADDRESS_" varchar, "C"."POINAME_" varchar, "C"."PHONE_" varchar, "C"."BUSSTOP_" varchar,'
        #              '"C"."BUS_" varchar, "C"."URL_" varchar, "C"."DEALTIME_" varchar, "C"."DATETIME_" varchar,'
        #              '"C"."ENTITY_NAME_" varchar, "C"."ENTITY_CODE_" varchar, "C"."LAT_" varchar, "C"."LNG_" varchar,'
        #              '"C"."CHECK_LAT_" varchar, "C"."CHECK_LNG_" varchar'
        #              ') IMMUTABLE_ROWS = true')
        # self.p_client.create_new_table_phoenix(connection=self.connection, sql=table_sql)

        self.p_client.table_name = "FANSILE"
        data_cursor = self.p_client.search_all_from_phoenix(
            connection=self.connection, dict_status=True)
        self.p_client.table_name = "CHA_BRANCH_MAPBAR"
        while True:
            try:
                data = data_cursor.__next__()

                # del data["('C', 'CHECK_LNG_')"]
                # if not data["LAT_"]:
                #     self.p_client.upsert_to_phoenix_by_one(connection=self.connection, data=data)
                #     continue
                # if 30.7083860773 < float(data["LAT_"]) < 31.8739003864:
                #     pass
                # else:
                #     self.logger.warning(f"错误 _id: {data['ID_']}, 经纬度: {data['LAT_']},{data['LNG_']}")
                #     data["CHECK_LAT_"] = data["LAT_"]
                #     data["CHECK_LNG_"] = data["LNG_"]
                #     data["LAT_"] = ""
                #     data["LNG_"] = ""
                #
                #     self.p_client.upsert_to_phoenix_by_one(connection=self.connection, data=data)
                #     continue
                # if 120.8778122800 < float(data["LNG_"]) < 122.1248433443:
                #     self.p_client.upsert_to_phoenix_by_one(connection=self.connection, data=data)
                #     continue
                # else:
                #     self.logger.warning(f"错误 _id: {data['ID_']}, 经纬度: {data['LAT_']},{data['LNG_']}")
                #     data["CHECK_LAT_"] = data["LAT_"]
                #     data["CHECK_LNG_"] = data["LNG_"]
                #     data["LAT_"] = ""
                #     data["LNG_"] = ""
                #     self.p_client.upsert_to_phoenix_by_one(connection=self.connection, data=data)
                #     continue
                self.p_client.upsert_to_phoenix_by_one(
                    connection=self.connection, data=data)
                self.count += 1
                if self.count % 100 == 0:
                    self.logger.info(
                        f"HBase 插入成功, _id: {data['_id']}, 成功条数 {self.count} 条")

            except StopIteration:
                break

    def __check_lat(self, data):
        if "LAT_" not in data:
            return data
        # 上海
        # if 30.7083860773 < float(data["LAT_"]) < 31.8739003864:
        # 北京
        if 39.4498800000 < float(data["LAT_"]) < 41.1684980000:
            pass
        else:
            self.logger.warning(
                f"错误 _id: {data['_id']}, 经纬度: {data['LAT_']},{data['LNG_']}")
            data["CHECK_LAT_"] = data["LAT_"]
            data["CHECK_LNG_"] = data["LNG_"]
            data["LAT_"] = ""
            data["LNG_"] = ""
            return data
        # 上海
        # if 120.8778122800 < float(data["LNG_"]) < 122.1248433443:
        # 北京
        if 115.4534230000 < float(data["LNG_"]) < 117.5461160000:
            return data
        else:
            self.logger.warning(
                f"错误 _id: {data['_id']}, 经纬度: {data['LAT_']},{data['LNG_']}")
            data["CHECK_LAT_"] = data["LAT_"]
            data["CHECK_LNG_"] = data["LNG_"]
            data["LAT_"] = ""
            data["LNG_"] = ""
            return data