Exemplo n.º 1
0
    def match_from_mongo(self, collection, match, output):
        mon_logger = Logger().logger
        try:
            mon_logger.info("开始查取数据")
            result = collection.aggregate([{
                "$match": match
            }, {
                "$project": {
                    "budgetPrice": 1,
                    "_id": 0,
                    output: 1
                }
            }])
            for i in result:
                if i is not None:
                    mon_logger.info("数据查取成功")
                    return i[output]
                else:
                    mon_logger.error("WEIBO_CODE_ 查取数据为空")
                    # raise Exception("WEIBO_CODE_ 查取失败")

        except TypeError as e:
            mon_logger.error(
                "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match))
            raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e))

        finally:
            self.client_close()
Exemplo n.º 2
0
    def __init__(self):
        self.logger = Logger().logger
        self.remove_id_list = list()
        self.copy_mongo_data_list = list()
        # 创建 MySQL 对象
        __mysql_config = {
            "host": MYSQL_HOST_25,
            "port": MYSQL_PORT_25,
            "database": MYSQL_DATABASE_25,
            "user": MYSQL_USER_25,
            "password": MYSQL_PASSWORD_25,
            "table": MYSQL_TABLE_25
        }

        __mysql_client = MysqlClient(**__mysql_config)
        __mysql_connection = __mysql_client.client_to_mysql()

        self.sales_status = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'SALES_STATUS\'",
            connection=__mysql_connection)
        self.produc_category = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'PRODUC_CATEGORY\'",
            connection=__mysql_connection)
        self.revenue_type = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'REVENUE_TYPE\'",
            connection=__mysql_connection)
        self.operaton_pattern = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'OPERATION_PATTERN\'",
            connection=__mysql_connection)
        self.purchase_amount = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'PURCHASE_AMOUNT\'",
            connection=__mysql_connection)
        self.duration_type = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'DURATION_TYPE\'",
            connection=__mysql_connection)
        __mysql_client.close_client(connection=__mysql_connection)

        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.verify_list = [
            "ID_", "ENTITY_CODE_", "AREA_CODE_", "BANK_CODE_", "BANK_NAME_",
            "UNIT_CODE_", "PERIOD_CODE_", "CONTENT_", "REMARK_",
            "CREATE_TIME_", "UPDATE_TIME_", "CODE_", "NAME_", "TIME_LIMIT_",
            "YIELD_RATE_", "BREAKEVEN_", "START_FUNDS_", "INVEST_PERIOD_",
            "SALE_START_", "SALE_END_", "RISK_LEVEL_", "REDEMING_MODE_",
            "PRIVATE_BANK_", "URL_", "DEALTIME_", "DATETIME_", "ENTITY_NAME_",
            "STATUS_", "SALE_DISTRICT_", "CURRENCY_TYPE_", "INCREASE_UNIT_",
            "YIELD_START_DATE_", "YIELD_END_DATE_", "YIELD_TYPE_", "TARGET_",
            "PRODUCT_TYPE_", "YIELD_STATMENT_", "INVEST_RANGE_", "PRE_STOP_",
            "RASE_PLAN_", "PURCHASE_"
        ]
Exemplo n.º 3
0
    def http_client(self, url, param=None, method='GET', code="utf-8"):
        # log = ICrawlerLog(name='spider').save
        log = Logger().logger
        # username = "******"  # 您的用户名
        username = "******"  # 您的用户名
        # password = "******"  # 您的密码
        password = "******"  # 您的密码

        ip = self.wandou()
        ips = ip.split(':')
        proxy_ip = str(ips[0])  # 代理ip;
        proxy_port = str(ips[1])  # 代理端口号;
        print(proxy_ip, proxy_port)
        headers = {
            'Proxy-Authorization':
            'Basic %s' % (self.base_code(username, password))
        }

        if param:
            headers = dict(headers, **param)
        try:
            con = http.client.HTTPConnection(proxy_ip,
                                             port=proxy_port,
                                             timeout=10)
            con.request(method, url, headers=headers)
            resu = con.getresponse()
            text = resu.read().decode(code, errors="ignore")
            return text
        except Exception as e:
            log.error(e.args)
            return None
Exemplo n.º 4
0
    def __init__(self):
        # "CNINFONEWS" pdf too long
        self.code_list = [
            "CAIJINGNEWS", "CNINFONEWS", "CSFINACIAL", "CSFINACIALNEWS",
            "CSNEWS", "CSNOTICE", "FINAQQNEWS", "XLCJYHMKNEWS", "XLCJNEWS",
            "XLCJGSNEWS", "WYCJNEWS", "WYCJGSNEWS", "NEWS163DOM",
            "NEWS10JQKA2", "NEWS10JQKA", "HOUSEQQNEWS"
        ]

        self.logger = Logger().logger
        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        # 插入 spider_data_old 的数据列表
        # self.copy_mongo_data_list = list()
        # 删除 spider_data 的数据 _id 列表
        # self.remove_id_list = list()
        # self.branch_code_list = list()

        self.verify_list = [
            "ENTITY_CODE_", "ENTITY_NAME_", "URL_", "PERIOD_CODE_", "STATUS_",
            "REMARK_", "CREATE_TIME_", "UPDATE_TIME_", "BANK_NAME_",
            "BANK_CODE_", "CONTENT_", "DATA_SOURCE_", "KEYWORDS_",
            "ENTITY_NAME_", "ID_"
        ]
Exemplo n.º 5
0
    def __init__(self):
        # "ABCORGANIZE", "BOCOMORGANIZE","BOCORGANIZE", "CBHBORGANIZE", "CCBORGANIZE", "CEBORGANIZE",
        #                   "CGBORGANIZE", "CIBORGANIZE", "CMBCORGANIZE", "CMBORGANIZE", "CZBORGANIZE", "EBCLORGANIZE",
        self.code_list = [
            "ECITICORGANIZE", "HXBORGANIZE", "ICBCORGANIZE", "PABORGANIZE",
            "PSBCORGANIZE", "SPDBORGANIZE"
        ]

        self.logger = Logger().logger
        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.copy_mongo_data_list = list()
        self.remove_id_list = list()
        self.branch_code_list = list()

        self.verify_list = [
            "ID_", "BANK_CODE_", "BANK_NAME_", "CREATE_TIME_", "AREA_CODE_",
            "UNIT_CODE_", "ADDR_", "PROVINCE_NAME_", "PROVINCE_CODE_", "CITY_",
            "CITY_CODE_", "DISTRICT_NAME_", "DISTRICT_CODE_", "LAT_", "LNG_",
            "NAME_", "ENTITY_CODE_", "DEALTIME_", "URL_", "TEL_", "CODE_",
            "BUSINESS_HOURS_", "STATUS_1"
        ]
Exemplo n.º 6
0
    def __init__(self):
        self.code_list = [
            "STCNFUND", "ABCFUND", "CCBFUND", "CITICFUND", "ICBCFUND"
        ]
        self.logger = Logger().logger
        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.copy_mongo_data_list = list()
        self.remove_id_list = list()
        self.branch_code_list = list()

        # 基金
        self.verify_list = [
            "ENTITY_CODE_", "ENTITY_NAME_", "URL_", "PERIOD_CODE_", "STATUS_",
            "REMARK_", "CREATE_TIME_", "UPDATE_TIME_", "CODE_", "NAME_",
            "FUND_NEW_VALUE_", "TOTAL_NEW_VALUE_", "FUND_OLD_VALUE_",
            "TOTAL_OLD_VALUE_", "DAILY_RATE_", "YEAR_REWARD_", "SUBS_STATUS_",
            "ATONEM_STATUS_", "TYPE_", "ID_", "NEWEST_VALUE_", "TOTAL_VALUE_",
            "POPULARITY_", "RATING_", "OLD_VALUE_", "UNIT_VALUE_", "SCALE_",
            "ESTABLISH_DATE_", "RISK_LEVEL_", "BASE_INFO_", "YIELD_",
            "INVEST_", "MONTH_RATE_", "QUARTER_RATE_", "HALF_YEAR_RATE_",
            "HISTORY_RATE_", "FUND_STATUS_", "COMPANY_", "SUBS_STATUS_CODE_",
            "TYPE_CODE_"
        ]
Exemplo n.º 7
0
    def search_by_status(self, collection, data_id=None):
        mon_logger = Logger().logger
        try:
            mon_logger.info("开始查取数据")
            if data_id:
                find_id = ObjectId(data_id)
                result_one = collection.find_one({
                    "$and": [{
                        "ENTITY_CODE_": self.mongo_entity_code
                    }, {
                        "_id": {
                            "$gte": find_id
                        }
                    }, {
                        "d": {
                            "$exists": False
                        }
                    }]
                })
            else:
                result_one = collection.find_one({
                    "$and": [{
                        "ENTITY_CODE_": self.mongo_entity_code
                    }, {
                        "d": {
                            "$exists": False
                        }
                    }]
                })
            if result_one is not None:
                result = collection.find(
                    {
                        "$and": [{
                            "ENTITY_CODE_": self.mongo_entity_code
                        }, {
                            "_id": {
                                "$gte": result_one["_id"]
                            }
                        }, {
                            "d": {
                                "$exists": False
                            }
                        }]
                    },
                    no_cursor_timeout=True)

                mon_logger.info("ENTITY: {} 数据查取成功共 {}条".format(
                    result.count()))
                return result
            else:
                mon_logger.info("ENTITY: {} 数据查取为空".format(
                    self.mongo_entity_code))
                return None
        except TypeError as e:
            mon_logger.error(
                "MongoDB数据查取失败,错误信息为{}, 请检查 ENTITY_CODE_ 是否正确:{}".format(
                    e, self.mongo_entity_code))
        finally:
            self.client_close()
Exemplo n.º 8
0
 def get_check_collection(self, collection_list):
     mon_logger = Logger().logger
     if self.mongo_collection in collection_list:
         collection = self.db[self.mongo_collection]
         return collection
     else:
         mon_logger.error("MongoDB没有该集合,请检查")
         self.client_close()
Exemplo n.º 9
0
 def get_check_collection(self, db, collection_list):
     mon_logger = Logger().logger
     if self.mongo_collection in collection_list:
         collection = db[self.mongo_collection]
         return collection
     else:
         mon_logger.error(
             f"MongoDB {self.mongo_db} 没有 {self.mongo_collection} 集合,请检查")
         return
Exemplo n.º 10
0
 def __init__(self):
     self.code_list = [
         "ABCORGANIZE", "BOCOMORGANIZE", "BOCORGANIZE", "CBHBORGANIZE",
         "CCBORGANIZE", "CEBORGANIZE", "CGBORGANIZE", "CIBORGANIZE",
         "CMBCORGANIZE", "CMBORGANIZE", "CZBORGANIZE", "EBCLORGANIZE",
         "ECITICORGANIZE", "HXBORGANIZE", "ICBCORGANIZE", "PABORGANIZE",
         "PSBCORGANIZE", "SPDBORGANIZE"
     ]
     self.logger = Logger().logger
     self.count = 0
Exemplo n.º 11
0
 def __init__(self, param):
     self.logger = Logger().logger
     self.invoke_type = "BRANCH"
     self.base_dir = os.path.dirname(os.getcwd())
     self.param_dict = eval(param)
     self.param = "\"" + param + "\""
     if self.param_dict:
         try:
             self.entity_type = self.param_dict["entityType"]
         except Exception:
             raise Exception
Exemplo n.º 12
0
    def __init__(self):
        # 创建 MongoDB 对象
        self.m_client = MongoClient(mongo_collection="BAIDU_SEARCH")
        db, collection_list = self.m_client.client_to_mongodb()
        self.collection = self.m_client.get_check_collection(
            db=db, collection_list=collection_list)

        # 创建 Phoenix 对象
        self.p_client = PhoenixHbase(table_name="BAIDU_SEARCH")
        # 连接 Phoenix
        self.connection = self.p_client.connect_to_phoenix()

        self.logger = Logger().logger

        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.error_count = 0
        self.data_id = ""

        # BANK_NAME_ 字典  交通银行 BOCOM 改为 COMM 中信银行 ECITIC 改为 CITIC  增加 平安银行 北京银行 上海银行
        self.name_dict = {
            "ICBC": "中国工商银行",
            "ABC": "中国农业银行",
            "BOC": "中国银行",
            "CCB": "中国建设银行",
            "COMM": "交通银行",
            "PSBC": "中国邮政储蓄银行",
            "CZB": "浙商银行",
            "CBHB": "渤海银行",
            "CITIC": "中信银行",
            "CEB": "中国光大银行",
            "HXB": "华夏银行",
            "CMBC": "中国民生银行",
            "CMB": "招商银行",
            "CIB": "兴业银行",
            "CGB": "广发银行",
            "PAB": "平安银行",
            "SPDB": "浦发银行",
            "EBCL": "恒丰银行",
            "PINGAN": "平安银行",
            "LTD": "中国光大银行",
            "BEIJING": "北京银行",
            "BOSC": "上海银行"
        }

        # TYPE_ 列表
        self.type_list = [
            "Market", "Activity", "GoodStart", "MidSeason", "PrivateBank",
            "Recommendation"
        ]
Exemplo n.º 13
0
    def insert_to_mysql(self, connection, data):
        """
        插入新数据
        :param connection:
        :param data: type => tuple List or dict
        :return:
        """
        mysql_logger = Logger().logger

        if isinstance(data, dict):
            k_list = [key for key in data.keys()]
            v_list = tuple([value for value in data.values()])
            v_sql = str(v_list)
            if v_sql[-2] == ",":
                v_sql = v_sql[:-2] + ")"
            sql = f"INSERT INTO {self.mysql_table} ({','.join(k_list)}) VALUES{v_sql}"
        elif isinstance(data, (list, tuple)):
            k_list = [key for key in data[0].keys()]
            value_list = list()
            for each in data:
                v_list = str(tuple([value for value in each.values()]))
                if v_list[-2] == ",":
                    v_list = v_list[:-2] + ")"
                value_list.append(v_list)
            sql = f"INSERT INTO {self.mysql_table} ({','.join(k_list)}) VALUES"
            sql = sql + ",".join(value_list)
        else:
            raise Exception("not format type of data")
        try:
            mysql_logger.info(f"网络声量sql==>{sql}")
            count = self.cs_commit(connection=connection, sql=sql)
            mysql_logger.info(f"MySQL 插入成功 {count} 条")
        except Exception as e:
            mysql_logger.exception(f"网络声量  插入失败,ERROR: {e}")
Exemplo n.º 14
0
    def match_from_mongo(self, collection, match, output):
        """
        查询所有数据, 返回游标对象(聚合)
        :param collection:
        :param match: match condition like dict {"ENTITY_CODE_": "XXXXXXXXX"}
        :param output: output field like list or dict
        :return:
        """
        mon_logger = Logger().logger
        if isinstance(output, str):
            output = [output]
        try:
            mon_logger.info("MongoDB 开始查取数据")
            output_condition = dict()
            for o in output:
                output_condition[o] = 1
            result = collection.aggregate([{
                "$match": match
            }, {
                "$project": output_condition
            }])
            mon_logger.info("MongoDB 数据查取成功")
            return result

        except TypeError as e:
            mon_logger.error(
                "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match))
            # raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e))

        finally:
            self.client_close()
Exemplo n.º 15
0
    def __new__(cls, table_name, collection_name, param, verify_field=None):
        """
        :param table_name: Hbase 表名
        :param collection_name: MongoDB 集合名
        :param entity_code:
        :return:
        """
        # hasattr判断对象是否包括属性
        if not hasattr(cls, "instance"):
            cls.instance = super(GenericScript, cls).__new__(cls)
            # phoenix connection
            cls.p_client = PhoenixHbase(table_name=table_name)
            cls.connection = cls.p_client.connect_to_phoenix()
            # MongoDB connection
            cls.m_client = MongoClient(mongo_collection=collection_name)
            cls.db, cls.collection_list = cls.m_client.client_to_mongodb()
            # MongoDB old connection
            cls.old_client = MongoClient(mongo_collection=collection_name)
            cls.old_client.mongo_db = "spider_data_old"
            cls.old_db, cls.old_collection_list = cls.old_client.client_to_mongodb(
            )
            # Mysql connection
            cls.mysql_client, cls.mysql_connection = cls.mysql_connect(
                dev=True)
            cls.bank_list = cls.data_from_mysql()
            # Log
            cls.logger = Logger().logger
            # 统计
            cls.count_all = 0

        return cls.instance
Exemplo n.º 16
0
 def __new__(cls, table_name, collection_name, param, verify_field=None):
     """
     :param table_name: Hbase 表名
     :param collection_name: MongoDB 集合名
     :param entity_code:
     :return:
     """
     if not hasattr(cls, "instance"):
         cls.instance = super(GenericScript, cls).__new__(cls)
         # phoenix connection
         cls.p_client = PhoenixHbase(table_name=table_name)
         cls.connection = cls.p_client.connect_to_phoenix()
         # # HBase connection
         # cls.h_client = ThriftHbase()
         # MongoDB connection
         cls.m_client = MongoClient(mongo_collection=collection_name)
         cls.db, cls.collection_list = cls.m_client.client_to_mongodb()
         # MongoDB old connection
         # spider_data_old 的表连接是遍历查询出来的,,所以需要手动建立
         cls.old_client = MongoClient(mongo_collection=collection_name)
         cls.old_client.mongo_db = "spider_data_old"
         # cls.old_client.mongo_db = "spider_data"
         # cls.old_client.client = pymongo.MongoClient(host="172.22.69.41", port=27017, serverSelectionTimeoutMS=60,
         #                                             connectTimeoutMS=60, connect=False)
         cls.old_db, cls.old_collection_list = cls.old_client.client_to_mongodb(
         )
         # Mysql connection
         cls.mysql_client, cls.mysql_connection = cls.mysql_connect()
         cls.province_list, cls.city_list, cls.area_list, cls.dir_area_list, cls.bank_list = cls.data_from_mysql(
         )
         # Log
         cls.logger = Logger().logger
         # 统计
         cls.count_all = 0
         # 银行字典
         # cls.bank_dict = {'中国工商银行': 'ICBC', '中国农业银行': 'ABC', '中国银行': 'BOC', '中国建设银行': 'CCB', '交通银行': 'BOCOM',
         #                  '中国邮政储蓄银行': 'PSBC', '浙商银行': 'CZB', '渤海银行': 'CBHB', '中信银行': 'ECITIC', '中国光大银行': 'CEB',
         #                  '华夏银行': 'HXB', '中国民生银行': 'CMBC', '招商银行': 'CMB', '兴业银行': 'CIB', '广发银行': 'CGB',
         #                  '平安银行': 'PAB', '浦发银行': 'SPDB', '恒丰银行': 'EBCL'}
         # 汉字阿拉伯字典
         cls.number_dict = {
             "〇": "0",
             "○": "0",
             "零": "0",
             "一": "1",
             "二": "2",
             "三": "3",
             "四": "4",
             "五": "5",
             "六": "6",
             "七": "7",
             "八": "8",
             "九": "9",
             "十": "10",
             "年": "-",
             "月": "-",
             "日": ""
         }
     return cls.instance
Exemplo n.º 17
0
 def __init__(self, entity_type="WEIBOBASICINFO"):
     self.entity_type = entity_type
     self.logger = Logger().logger
     self.verify_list = [
         "ID_", "BANK_CODE_", "BANK_NAME_", "PERIOD_TIME_", "AREA_CODE_",
         "CREATE_TIME_", "WEIBO_CODE_", "MAIN_URL_", "NAME_", "FOCUS_",
         "FANS_", "COMPANY_URL_", "COMPANY_", "DETAILED_URL_", "VIRIFIED_",
         "BIREF_", "ENTITY_NAME_", "ENTITY_CODE_", "DEALTIME_",
         "PROVINCE_NAME_", "PROVINCE_CODE_", "STATUS_1"
     ]
     self.remove_id_list = list()
     self.copy_mongo_data_list = list()
     self.branch_code_list = list()
     self.find_count = 0
     self.bad_count = 0
     self.success_count = 0
     self.remove_count = 0
     self.old_count = 0
Exemplo n.º 18
0
    def __init__(self):
        # 创建 MongoDB 对象
        self.m_client = MongoClient(mongo_collection="JSINSURANCE_CCBDATA")
        db, collection_list = self.m_client.client_to_mongodb()
        self.collection = self.m_client.get_check_collection(
            db=db, collection_list=collection_list)

        # 创建 MySQL 对象
        __mysql_config = {
            "host": MYSQL_HOST_25,
            "port": MYSQL_PORT_25,
            "database": MYSQL_DATABASE_25,
            "user": MYSQL_USER_25,
            "password": MYSQL_PASSWORD_25,
            "table": MYSQL_TABLE_25
        }

        __mysql_client = MysqlClient(**__mysql_config)
        __mysql_connection = __mysql_client.client_to_mysql()

        self.type = __mysql_client.search_area_code(
            sql=
            "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'TYPE\'",
            connection=__mysql_connection)

        __mysql_client.close_client(connection=__mysql_connection)

        # 创建 Phoenix 对象
        self.p_client = PhoenixHbase(table_name="INSURANCE")
        # 连接 Phoenix
        self.connection = self.p_client.connect_to_phoenix()

        self.logger = Logger().logger

        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.error_count = 0
        self.data_id = ""
        self.a = list()
Exemplo n.º 19
0
 def wandou(self):
     """
     豌豆代理获取
     :return:
     """
     # log = ICrawlerLog(name='spider').save
     log = Logger().logger
     url_wandou = r'http://h.wandouip.com/get/ip-list?pack=853&num=1&xy=1&type=2&lb=\r\n&mr=1&'
     try:
         time.sleep(random.randint(1, 5))
         re = requests.get(url=url_wandou).json()
         print(re)
         time.sleep(100)
     except:
         print(2)
         log.error('豌豆代理外部接口获取ip异常!')
         return False
     i = re.get('data')[0]
     ip = '{ip}:{port}'.format(ip=i.get('ip'), port=i.get('port'))
     print(ip)
     return ip
Exemplo n.º 20
0
    def __init__(self):
        # 创建 MongoDB 对象
        self.m_client = MongoClient(mongo_collection="TREND")
        db, collection_list = self.m_client.client_to_mongodb()
        self.collection = self.m_client.get_check_collection(
            db=db, collection_list=collection_list)

        # 创建 Phoenix 对象
        self.p_client = PhoenixHbase(table_name="CHA_BRANCH_MARKET_ACT")
        # 连接 Phoenix
        self.connection = self.p_client.connect_to_phoenix()

        self.logger = Logger().logger

        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.error_count = 0
        self.data_id = ""
Exemplo n.º 21
0
    def search_from_mysql(self,
                          connection,
                          output=None,
                          where_condition=None,
                          limit_num=None,
                          offset_num=None):
        """
        查询
        :param connection:
        :param output: 输出字段
        :param where_condition: where 条件
        :param limit_num: 输出数量
        :param offset_num: 跳过数量
        :return:
        """
        mysql_logger = Logger().logger
        if output:
            if isinstance(output, str):
                sql = f"SELECT {output} FROM {self.mysql_table}"
            elif isinstance(output, (tuple, list)):
                sql = f"SELECT {','.join(output)} FROM {self.mysql_table}"
            else:
                raise Exception("not format type of \"output\"")
        else:
            sql = f"SELECT * FROM {self.mysql_table}"

        if where_condition:
            if "where" in where_condition or "WHERE" in where_condition:
                sql = sql + " " + where_condition
            else:
                sql = sql + f" WHERE {where_condition}"

        sql = sql + f" LIMIT {limit_num}" if limit_num else sql

        sql = sql + f" OFFSET {offset_num}" if offset_num else sql

        try:
            cs = connection.cursor(pymysql.cursors.DictCursor)
            count = cs.execute(sql)
            result = cs.fetchall()
            if count:
                mysql_logger.info(f"Mysql 查取成功 {count} 条")
                return result
            else:
                mysql_logger.info("数据库查取数为0")
        except TypeError:
            mysql_logger.error("MySQL查取失败,请检查")
        finally:
            cs.close()
Exemplo n.º 22
0
 def __init__(self,
              table_name="CHA_BRANCH_MAPBAR",
              collection_name="mapbar"):
     # phoenix connection
     self.p_client = PhoenixHbase(table_name=table_name)
     self.connection = self.p_client.connect_to_phoenix()
     # MongoDB connection
     self.m_client = MongoClient(mongo_collection=collection_name,
                                 entity_code="MAPBAR_DEATAIL_BJ")
     self.m_client.mongo_host = "172.22.69.35"
     self.m_client.mongo_port = 20000
     self.m_client.client = pymongo.MongoClient(host="172.22.69.35",
                                                port=20000,
                                                serverSelectionTimeoutMS=60,
                                                connectTimeoutMS=60,
                                                connect=False)
     self.db, self.collection_list = self.m_client.client_to_mongodb()
     self.collection = self.m_client.get_check_collection(
         db=self.db, collection_list=self.collection_list)
     # Log
     self.logger = Logger().logger
     # count
     self.count = 0
Exemplo n.º 23
0
 def __init__(self):
     self.file_list = list()
     self.get_code_list()
     self.logger = Logger().logger
     self.find_count = 0
     self.success_count = 0
     self.remove_count = 0
     self.old_count = 0
     self.copy_mongo_data_list = list()
     self.remove_id_list = list()
     # 字段验证列表
     self.verify_list = [
         "ID_", "CONTENT_", "NOTICE_TIME_", "TITLE_", "PROJECT_NAME_",
         "BID_CONTENT_", "SIGN_START_TIME_", "SIGN_END_TIME_",
         "OPEN_BID_TIME_", "OPEN_BID_PLACE_", "BID_AGENCY_",
         "APPLY_CONDITION_", "SIGN_QUALIFICATION_", "PROJECT_ID_",
         "WIN_CANDIDATE_", "CANDIDATE_RANK_", "BID_", "URL_", "DEALTIME_",
         "CREATE_TIME_", "ENTITY_NAME_", "ENTITY_CODE_", "ENTITY_STATUS_",
         "SIGN_MATERIAL_", "BID_TYPE_", "DATETIME_", "BUDGET_PRICE_",
         "PASS_REASON_", "PRESALE_CONTENT_", "PRESALE_WAY_",
         "PRESALE_START_TIME_", "PRESALE_END_TIME_", "PRESALE_ADDR_",
         "PRESALE_PREPARE_", "IMAGE_"
     ]
Exemplo n.º 24
0
    def __init__(self, entity_type="FOR_TEST_WECHAT"):
        """
        初始化参数
        :param entity_type: WECHAT
        """
        self.entity_type = entity_type
        self.logger = Logger().logger

        # 创建 Phoenix 对象
        self.p_client = PhoenixHbase(table_name=self.entity_type)
        # 连接 Phoenix
        self.connection = self.p_client.connect_to_phoenix()

        self.remove_id_list = list()
        self.copy_mongo_data_list = list()

        self.find_count = 0
        self.success_count = 0
        self.remove_count = 0
        self.old_count = 0
        self.bad_count = 0
        self.error_count = 0
        self.data_id = ""
        self.row_key_count = 0
Exemplo n.º 25
0
 def __init__(self, table_name, collection_name, param):
     self.logger = Logger().logger
     self.remove_id_list = list()
     self.copy_mongo_data_list = list()
     self.branch_code_list = list()
     self.find_count = 0
     self.bad_count = 0
     self.success_count = 0
     self.remove_count = 0
     self.old_count = 0
     self.name_dict = {
         '工行': 'ICBC',
         '工商银行': 'ICBC',
         '农行': 'ABC',
         '农业银行': 'ABC',
         '中行': 'BOC',
         '中银': 'BOC',
         '建行': 'CCB',
         '邮政储蓄银行': 'PSBC',
         '建信': 'CCB',
         '建设银行': 'CCB',
         '交行': 'BCM',
         '交通银行': 'BCM',
         '邮储银行': 'PSBC',
         '浙商银行': 'CZB',
         '渤海银行': 'CBHB',
         '中信银行': 'ECITIC',
         '光大银行': 'CEB',
         '华夏银行': 'HB',
         '招行': 'CMB',
         '招商银行': 'CMB',
         '兴业银行': 'CIB',
         '广发银行': 'CGB',
         '平安银行': 'PAB',
         '浦发银行': 'SPDB',
         '恒丰银行': 'EBCL',
         '浦东发展银行': 'SPDB',
         '民生银行': 'CMBC',
         '汇丰银行': 'HSBC',
         '渣打银行': 'SC',
         '南海农商银行': 'NRC ',
         '顺德农村商业银行': 'sdebank',
     }
     super(WeiboBasicInfoScript,
           self).__init__(table_name=table_name,
                          collection_name=collection_name,
                          param=param,
                          verify_field={"WEIBO_CODE_": "WEIBO_CODE_"})
Exemplo n.º 26
0
    def __new__(cls, table_name, collection_name):
        """

        :param table_name: Hbase 表名
        :param collection_name: MongoDB 集合名
        :return:
        """
        if not hasattr(cls, "instance"):
            cls.instance = super(GenericScript, cls).__new__(cls)

            # phoenix connection
            cls.p_client = PhoenixHbase(table_name=table_name)
            cls.connection = cls.p_client.connect_to_phoenix()
            # MongoDB connection
            cls.m_client = MongoClient(mongo_collection=collection_name)
            cls.db, cls.collection_list = cls.m_client.client_to_mongodb()
            # Mysql connection
            cls.province_list, cls.city_list, cls.area_list, cls.dir_area_list = cls.area_from_mysql(
            )
            # Log
            cls.logger = Logger().logger
            # 银行字典
            cls.bank_dict = {
                '中国工商银行': 'ICBC',
                '中国农业银行': 'ABC',
                '中国银行': 'BOC',
                '中国建设银行': 'CCB',
                '交通银行': 'BOCOM',
                '中国邮政储蓄银行': 'PSBC',
                '浙商银行': 'CZB',
                '渤海银行': 'CBHB',
                '中信银行': 'ECITIC',
                '中国光大银行': 'CEB',
                '华夏银行': 'HXB',
                '中国民生银行': 'CMBC',
                '招商银行': 'CMB',
                '兴业银行': 'CIB',
                '广发银行': 'CGB',
                '平安银行': 'PAB',
                '浦发银行': 'SPDB',
                '恒丰银行': 'EBCL'
            }

        return cls.instance
Exemplo n.º 27
0
 def __new__(cls, *args, **kwargs):
     if not hasattr(cls, "instance"):
         cls.instance = super(DataTransfer, cls).__new__(cls)
     config = {
         # "host": "192.168.1.103",
         "host": "172.22.69.43",
         "port": 3306,
         "table": "sch_job_inst",
         "database": "ijep",
         "user": "******",
         "password": "******",
         "charset": "utf8"
     }
     cls.mysql_client = MysqlClient(**config)
     cls.mysql_connection = cls.mysql_client.client_to_mysql()
     cls.hbase_client = PhoenixHbase("SCH_JOB_INST")
     cls.hbase_connection = cls.hbase_client.connect_to_phoenix()
     cls.logger = Logger().logger
     return cls.instance
Exemplo n.º 28
0
 def __init__(self,
              table_name="CHA_BRANCH_WEIBO_BASIC",
              collection_name="WEIBOBASICINFO"):
     # phoenix connection
     self.p_client = PhoenixHbase(table_name=table_name)
     self.connection = self.p_client.connect_to_phoenix()
     # Mongo connection
     self.m_client = MongoClient(entity_code="CMBCMICROBLOG",
                                 mongo_collection=collection_name)
     self.mongo_host = "172.22.69.35"
     self.mongo_port = 20000
     self.m_client.client = pymongo.MongoClient(host="172.22.69.35",
                                                port=20000,
                                                serverSelectionTimeoutMS=60,
                                                connectTimeoutMS=60,
                                                connect=False)
     self.db, self.collection_list = self.m_client.client_to_mongodb()
     self.collection = self.m_client.get_check_collection(
         db=self.db, collection_list=self.collection_list)
     # Log
     self.logger = Logger().logger
Exemplo n.º 29
0
    def get_mongo_column_dict(self, collection, column1, column2):
        mon_logger = Logger().logger
        try:
            mon_logger.info("开始查取数据")
            result = collection.aggregate([{
                "$project": {
                    "_id": 0,
                    column1: 1,
                    column2: 1
                }
            }])
            return result
        except TypeError as e:
            mon_logger.error(
                "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确".format(e))
            raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e))

        finally:
            self.m_client.client.close()
Exemplo n.º 30
0
    def delete_from_mysql(self, connection, where_condition):
        """
        删除
        :param connection:
        :param where_condition: where 条件
        :return:
        """
        mysql_logger = Logger().logger

        if "where" in where_condition or "WHERE" in where_condition:
            sql = f"DELETE FROM {self.mysql_table} {where_condition}"
        else:
            sql = f"DELETE FROM {self.mysql_table} WHERE {where_condition}"

        try:
            count = self.cs_commit(connection=connection, sql=sql)
            mysql_logger.info(f"MySQL 删除成功 {count} 条")
        except Exception as e:
            mysql_logger.exception(f"MySQL 删除失败,ERROR: {e}")