def match_from_mongo(self, collection, match, output): mon_logger = Logger().logger try: mon_logger.info("开始查取数据") result = collection.aggregate([{ "$match": match }, { "$project": { "budgetPrice": 1, "_id": 0, output: 1 } }]) for i in result: if i is not None: mon_logger.info("数据查取成功") return i[output] else: mon_logger.error("WEIBO_CODE_ 查取数据为空") # raise Exception("WEIBO_CODE_ 查取失败") except TypeError as e: mon_logger.error( "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match)) raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e)) finally: self.client_close()
def __init__(self): self.logger = Logger().logger self.remove_id_list = list() self.copy_mongo_data_list = list() # 创建 MySQL 对象 __mysql_config = { "host": MYSQL_HOST_25, "port": MYSQL_PORT_25, "database": MYSQL_DATABASE_25, "user": MYSQL_USER_25, "password": MYSQL_PASSWORD_25, "table": MYSQL_TABLE_25 } __mysql_client = MysqlClient(**__mysql_config) __mysql_connection = __mysql_client.client_to_mysql() self.sales_status = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'SALES_STATUS\'", connection=__mysql_connection) self.produc_category = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'PRODUC_CATEGORY\'", connection=__mysql_connection) self.revenue_type = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'REVENUE_TYPE\'", connection=__mysql_connection) self.operaton_pattern = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'OPERATION_PATTERN\'", connection=__mysql_connection) self.purchase_amount = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'PURCHASE_AMOUNT\'", connection=__mysql_connection) self.duration_type = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'DURATION_TYPE\'", connection=__mysql_connection) __mysql_client.close_client(connection=__mysql_connection) self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.verify_list = [ "ID_", "ENTITY_CODE_", "AREA_CODE_", "BANK_CODE_", "BANK_NAME_", "UNIT_CODE_", "PERIOD_CODE_", "CONTENT_", "REMARK_", "CREATE_TIME_", "UPDATE_TIME_", "CODE_", "NAME_", "TIME_LIMIT_", "YIELD_RATE_", "BREAKEVEN_", "START_FUNDS_", "INVEST_PERIOD_", "SALE_START_", "SALE_END_", "RISK_LEVEL_", "REDEMING_MODE_", "PRIVATE_BANK_", "URL_", "DEALTIME_", "DATETIME_", "ENTITY_NAME_", "STATUS_", "SALE_DISTRICT_", "CURRENCY_TYPE_", "INCREASE_UNIT_", "YIELD_START_DATE_", "YIELD_END_DATE_", "YIELD_TYPE_", "TARGET_", "PRODUCT_TYPE_", "YIELD_STATMENT_", "INVEST_RANGE_", "PRE_STOP_", "RASE_PLAN_", "PURCHASE_" ]
def http_client(self, url, param=None, method='GET', code="utf-8"): # log = ICrawlerLog(name='spider').save log = Logger().logger # username = "******" # 您的用户名 username = "******" # 您的用户名 # password = "******" # 您的密码 password = "******" # 您的密码 ip = self.wandou() ips = ip.split(':') proxy_ip = str(ips[0]) # 代理ip; proxy_port = str(ips[1]) # 代理端口号; print(proxy_ip, proxy_port) headers = { 'Proxy-Authorization': 'Basic %s' % (self.base_code(username, password)) } if param: headers = dict(headers, **param) try: con = http.client.HTTPConnection(proxy_ip, port=proxy_port, timeout=10) con.request(method, url, headers=headers) resu = con.getresponse() text = resu.read().decode(code, errors="ignore") return text except Exception as e: log.error(e.args) return None
def __init__(self): # "CNINFONEWS" pdf too long self.code_list = [ "CAIJINGNEWS", "CNINFONEWS", "CSFINACIAL", "CSFINACIALNEWS", "CSNEWS", "CSNOTICE", "FINAQQNEWS", "XLCJYHMKNEWS", "XLCJNEWS", "XLCJGSNEWS", "WYCJNEWS", "WYCJGSNEWS", "NEWS163DOM", "NEWS10JQKA2", "NEWS10JQKA", "HOUSEQQNEWS" ] self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 # 插入 spider_data_old 的数据列表 # self.copy_mongo_data_list = list() # 删除 spider_data 的数据 _id 列表 # self.remove_id_list = list() # self.branch_code_list = list() self.verify_list = [ "ENTITY_CODE_", "ENTITY_NAME_", "URL_", "PERIOD_CODE_", "STATUS_", "REMARK_", "CREATE_TIME_", "UPDATE_TIME_", "BANK_NAME_", "BANK_CODE_", "CONTENT_", "DATA_SOURCE_", "KEYWORDS_", "ENTITY_NAME_", "ID_" ]
def __init__(self): # "ABCORGANIZE", "BOCOMORGANIZE","BOCORGANIZE", "CBHBORGANIZE", "CCBORGANIZE", "CEBORGANIZE", # "CGBORGANIZE", "CIBORGANIZE", "CMBCORGANIZE", "CMBORGANIZE", "CZBORGANIZE", "EBCLORGANIZE", self.code_list = [ "ECITICORGANIZE", "HXBORGANIZE", "ICBCORGANIZE", "PABORGANIZE", "PSBCORGANIZE", "SPDBORGANIZE" ] self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.copy_mongo_data_list = list() self.remove_id_list = list() self.branch_code_list = list() self.verify_list = [ "ID_", "BANK_CODE_", "BANK_NAME_", "CREATE_TIME_", "AREA_CODE_", "UNIT_CODE_", "ADDR_", "PROVINCE_NAME_", "PROVINCE_CODE_", "CITY_", "CITY_CODE_", "DISTRICT_NAME_", "DISTRICT_CODE_", "LAT_", "LNG_", "NAME_", "ENTITY_CODE_", "DEALTIME_", "URL_", "TEL_", "CODE_", "BUSINESS_HOURS_", "STATUS_1" ]
def __init__(self): self.code_list = [ "STCNFUND", "ABCFUND", "CCBFUND", "CITICFUND", "ICBCFUND" ] self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.copy_mongo_data_list = list() self.remove_id_list = list() self.branch_code_list = list() # 基金 self.verify_list = [ "ENTITY_CODE_", "ENTITY_NAME_", "URL_", "PERIOD_CODE_", "STATUS_", "REMARK_", "CREATE_TIME_", "UPDATE_TIME_", "CODE_", "NAME_", "FUND_NEW_VALUE_", "TOTAL_NEW_VALUE_", "FUND_OLD_VALUE_", "TOTAL_OLD_VALUE_", "DAILY_RATE_", "YEAR_REWARD_", "SUBS_STATUS_", "ATONEM_STATUS_", "TYPE_", "ID_", "NEWEST_VALUE_", "TOTAL_VALUE_", "POPULARITY_", "RATING_", "OLD_VALUE_", "UNIT_VALUE_", "SCALE_", "ESTABLISH_DATE_", "RISK_LEVEL_", "BASE_INFO_", "YIELD_", "INVEST_", "MONTH_RATE_", "QUARTER_RATE_", "HALF_YEAR_RATE_", "HISTORY_RATE_", "FUND_STATUS_", "COMPANY_", "SUBS_STATUS_CODE_", "TYPE_CODE_" ]
def search_by_status(self, collection, data_id=None): mon_logger = Logger().logger try: mon_logger.info("开始查取数据") if data_id: find_id = ObjectId(data_id) result_one = collection.find_one({ "$and": [{ "ENTITY_CODE_": self.mongo_entity_code }, { "_id": { "$gte": find_id } }, { "d": { "$exists": False } }] }) else: result_one = collection.find_one({ "$and": [{ "ENTITY_CODE_": self.mongo_entity_code }, { "d": { "$exists": False } }] }) if result_one is not None: result = collection.find( { "$and": [{ "ENTITY_CODE_": self.mongo_entity_code }, { "_id": { "$gte": result_one["_id"] } }, { "d": { "$exists": False } }] }, no_cursor_timeout=True) mon_logger.info("ENTITY: {} 数据查取成功共 {}条".format( result.count())) return result else: mon_logger.info("ENTITY: {} 数据查取为空".format( self.mongo_entity_code)) return None except TypeError as e: mon_logger.error( "MongoDB数据查取失败,错误信息为{}, 请检查 ENTITY_CODE_ 是否正确:{}".format( e, self.mongo_entity_code)) finally: self.client_close()
def get_check_collection(self, collection_list): mon_logger = Logger().logger if self.mongo_collection in collection_list: collection = self.db[self.mongo_collection] return collection else: mon_logger.error("MongoDB没有该集合,请检查") self.client_close()
def get_check_collection(self, db, collection_list): mon_logger = Logger().logger if self.mongo_collection in collection_list: collection = db[self.mongo_collection] return collection else: mon_logger.error( f"MongoDB {self.mongo_db} 没有 {self.mongo_collection} 集合,请检查") return
def __init__(self): self.code_list = [ "ABCORGANIZE", "BOCOMORGANIZE", "BOCORGANIZE", "CBHBORGANIZE", "CCBORGANIZE", "CEBORGANIZE", "CGBORGANIZE", "CIBORGANIZE", "CMBCORGANIZE", "CMBORGANIZE", "CZBORGANIZE", "EBCLORGANIZE", "ECITICORGANIZE", "HXBORGANIZE", "ICBCORGANIZE", "PABORGANIZE", "PSBCORGANIZE", "SPDBORGANIZE" ] self.logger = Logger().logger self.count = 0
def __init__(self, param): self.logger = Logger().logger self.invoke_type = "BRANCH" self.base_dir = os.path.dirname(os.getcwd()) self.param_dict = eval(param) self.param = "\"" + param + "\"" if self.param_dict: try: self.entity_type = self.param_dict["entityType"] except Exception: raise Exception
def __init__(self): # 创建 MongoDB 对象 self.m_client = MongoClient(mongo_collection="BAIDU_SEARCH") db, collection_list = self.m_client.client_to_mongodb() self.collection = self.m_client.get_check_collection( db=db, collection_list=collection_list) # 创建 Phoenix 对象 self.p_client = PhoenixHbase(table_name="BAIDU_SEARCH") # 连接 Phoenix self.connection = self.p_client.connect_to_phoenix() self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.error_count = 0 self.data_id = "" # BANK_NAME_ 字典 交通银行 BOCOM 改为 COMM 中信银行 ECITIC 改为 CITIC 增加 平安银行 北京银行 上海银行 self.name_dict = { "ICBC": "中国工商银行", "ABC": "中国农业银行", "BOC": "中国银行", "CCB": "中国建设银行", "COMM": "交通银行", "PSBC": "中国邮政储蓄银行", "CZB": "浙商银行", "CBHB": "渤海银行", "CITIC": "中信银行", "CEB": "中国光大银行", "HXB": "华夏银行", "CMBC": "中国民生银行", "CMB": "招商银行", "CIB": "兴业银行", "CGB": "广发银行", "PAB": "平安银行", "SPDB": "浦发银行", "EBCL": "恒丰银行", "PINGAN": "平安银行", "LTD": "中国光大银行", "BEIJING": "北京银行", "BOSC": "上海银行" } # TYPE_ 列表 self.type_list = [ "Market", "Activity", "GoodStart", "MidSeason", "PrivateBank", "Recommendation" ]
def insert_to_mysql(self, connection, data): """ 插入新数据 :param connection: :param data: type => tuple List or dict :return: """ mysql_logger = Logger().logger if isinstance(data, dict): k_list = [key for key in data.keys()] v_list = tuple([value for value in data.values()]) v_sql = str(v_list) if v_sql[-2] == ",": v_sql = v_sql[:-2] + ")" sql = f"INSERT INTO {self.mysql_table} ({','.join(k_list)}) VALUES{v_sql}" elif isinstance(data, (list, tuple)): k_list = [key for key in data[0].keys()] value_list = list() for each in data: v_list = str(tuple([value for value in each.values()])) if v_list[-2] == ",": v_list = v_list[:-2] + ")" value_list.append(v_list) sql = f"INSERT INTO {self.mysql_table} ({','.join(k_list)}) VALUES" sql = sql + ",".join(value_list) else: raise Exception("not format type of data") try: mysql_logger.info(f"网络声量sql==>{sql}") count = self.cs_commit(connection=connection, sql=sql) mysql_logger.info(f"MySQL 插入成功 {count} 条") except Exception as e: mysql_logger.exception(f"网络声量 插入失败,ERROR: {e}")
def match_from_mongo(self, collection, match, output): """ 查询所有数据, 返回游标对象(聚合) :param collection: :param match: match condition like dict {"ENTITY_CODE_": "XXXXXXXXX"} :param output: output field like list or dict :return: """ mon_logger = Logger().logger if isinstance(output, str): output = [output] try: mon_logger.info("MongoDB 开始查取数据") output_condition = dict() for o in output: output_condition[o] = 1 result = collection.aggregate([{ "$match": match }, { "$project": output_condition }]) mon_logger.info("MongoDB 数据查取成功") return result except TypeError as e: mon_logger.error( "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match)) # raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e)) finally: self.client_close()
def __new__(cls, table_name, collection_name, param, verify_field=None): """ :param table_name: Hbase 表名 :param collection_name: MongoDB 集合名 :param entity_code: :return: """ # hasattr判断对象是否包括属性 if not hasattr(cls, "instance"): cls.instance = super(GenericScript, cls).__new__(cls) # phoenix connection cls.p_client = PhoenixHbase(table_name=table_name) cls.connection = cls.p_client.connect_to_phoenix() # MongoDB connection cls.m_client = MongoClient(mongo_collection=collection_name) cls.db, cls.collection_list = cls.m_client.client_to_mongodb() # MongoDB old connection cls.old_client = MongoClient(mongo_collection=collection_name) cls.old_client.mongo_db = "spider_data_old" cls.old_db, cls.old_collection_list = cls.old_client.client_to_mongodb( ) # Mysql connection cls.mysql_client, cls.mysql_connection = cls.mysql_connect( dev=True) cls.bank_list = cls.data_from_mysql() # Log cls.logger = Logger().logger # 统计 cls.count_all = 0 return cls.instance
def __new__(cls, table_name, collection_name, param, verify_field=None): """ :param table_name: Hbase 表名 :param collection_name: MongoDB 集合名 :param entity_code: :return: """ if not hasattr(cls, "instance"): cls.instance = super(GenericScript, cls).__new__(cls) # phoenix connection cls.p_client = PhoenixHbase(table_name=table_name) cls.connection = cls.p_client.connect_to_phoenix() # # HBase connection # cls.h_client = ThriftHbase() # MongoDB connection cls.m_client = MongoClient(mongo_collection=collection_name) cls.db, cls.collection_list = cls.m_client.client_to_mongodb() # MongoDB old connection # spider_data_old 的表连接是遍历查询出来的,,所以需要手动建立 cls.old_client = MongoClient(mongo_collection=collection_name) cls.old_client.mongo_db = "spider_data_old" # cls.old_client.mongo_db = "spider_data" # cls.old_client.client = pymongo.MongoClient(host="172.22.69.41", port=27017, serverSelectionTimeoutMS=60, # connectTimeoutMS=60, connect=False) cls.old_db, cls.old_collection_list = cls.old_client.client_to_mongodb( ) # Mysql connection cls.mysql_client, cls.mysql_connection = cls.mysql_connect() cls.province_list, cls.city_list, cls.area_list, cls.dir_area_list, cls.bank_list = cls.data_from_mysql( ) # Log cls.logger = Logger().logger # 统计 cls.count_all = 0 # 银行字典 # cls.bank_dict = {'中国工商银行': 'ICBC', '中国农业银行': 'ABC', '中国银行': 'BOC', '中国建设银行': 'CCB', '交通银行': 'BOCOM', # '中国邮政储蓄银行': 'PSBC', '浙商银行': 'CZB', '渤海银行': 'CBHB', '中信银行': 'ECITIC', '中国光大银行': 'CEB', # '华夏银行': 'HXB', '中国民生银行': 'CMBC', '招商银行': 'CMB', '兴业银行': 'CIB', '广发银行': 'CGB', # '平安银行': 'PAB', '浦发银行': 'SPDB', '恒丰银行': 'EBCL'} # 汉字阿拉伯字典 cls.number_dict = { "〇": "0", "○": "0", "零": "0", "一": "1", "二": "2", "三": "3", "四": "4", "五": "5", "六": "6", "七": "7", "八": "8", "九": "9", "十": "10", "年": "-", "月": "-", "日": "" } return cls.instance
def __init__(self, entity_type="WEIBOBASICINFO"): self.entity_type = entity_type self.logger = Logger().logger self.verify_list = [ "ID_", "BANK_CODE_", "BANK_NAME_", "PERIOD_TIME_", "AREA_CODE_", "CREATE_TIME_", "WEIBO_CODE_", "MAIN_URL_", "NAME_", "FOCUS_", "FANS_", "COMPANY_URL_", "COMPANY_", "DETAILED_URL_", "VIRIFIED_", "BIREF_", "ENTITY_NAME_", "ENTITY_CODE_", "DEALTIME_", "PROVINCE_NAME_", "PROVINCE_CODE_", "STATUS_1" ] self.remove_id_list = list() self.copy_mongo_data_list = list() self.branch_code_list = list() self.find_count = 0 self.bad_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0
def __init__(self): # 创建 MongoDB 对象 self.m_client = MongoClient(mongo_collection="JSINSURANCE_CCBDATA") db, collection_list = self.m_client.client_to_mongodb() self.collection = self.m_client.get_check_collection( db=db, collection_list=collection_list) # 创建 MySQL 对象 __mysql_config = { "host": MYSQL_HOST_25, "port": MYSQL_PORT_25, "database": MYSQL_DATABASE_25, "user": MYSQL_USER_25, "password": MYSQL_PASSWORD_25, "table": MYSQL_TABLE_25 } __mysql_client = MysqlClient(**__mysql_config) __mysql_connection = __mysql_client.client_to_mysql() self.type = __mysql_client.search_area_code( sql= "select DICT_CODE_,ITEM_LABEL_,ITEM_VALUE_ from sys_dict_item where DICT_CODE_=\'TYPE\'", connection=__mysql_connection) __mysql_client.close_client(connection=__mysql_connection) # 创建 Phoenix 对象 self.p_client = PhoenixHbase(table_name="INSURANCE") # 连接 Phoenix self.connection = self.p_client.connect_to_phoenix() self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.error_count = 0 self.data_id = "" self.a = list()
def wandou(self): """ 豌豆代理获取 :return: """ # log = ICrawlerLog(name='spider').save log = Logger().logger url_wandou = r'http://h.wandouip.com/get/ip-list?pack=853&num=1&xy=1&type=2&lb=\r\n&mr=1&' try: time.sleep(random.randint(1, 5)) re = requests.get(url=url_wandou).json() print(re) time.sleep(100) except: print(2) log.error('豌豆代理外部接口获取ip异常!') return False i = re.get('data')[0] ip = '{ip}:{port}'.format(ip=i.get('ip'), port=i.get('port')) print(ip) return ip
def __init__(self): # 创建 MongoDB 对象 self.m_client = MongoClient(mongo_collection="TREND") db, collection_list = self.m_client.client_to_mongodb() self.collection = self.m_client.get_check_collection( db=db, collection_list=collection_list) # 创建 Phoenix 对象 self.p_client = PhoenixHbase(table_name="CHA_BRANCH_MARKET_ACT") # 连接 Phoenix self.connection = self.p_client.connect_to_phoenix() self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.error_count = 0 self.data_id = ""
def search_from_mysql(self, connection, output=None, where_condition=None, limit_num=None, offset_num=None): """ 查询 :param connection: :param output: 输出字段 :param where_condition: where 条件 :param limit_num: 输出数量 :param offset_num: 跳过数量 :return: """ mysql_logger = Logger().logger if output: if isinstance(output, str): sql = f"SELECT {output} FROM {self.mysql_table}" elif isinstance(output, (tuple, list)): sql = f"SELECT {','.join(output)} FROM {self.mysql_table}" else: raise Exception("not format type of \"output\"") else: sql = f"SELECT * FROM {self.mysql_table}" if where_condition: if "where" in where_condition or "WHERE" in where_condition: sql = sql + " " + where_condition else: sql = sql + f" WHERE {where_condition}" sql = sql + f" LIMIT {limit_num}" if limit_num else sql sql = sql + f" OFFSET {offset_num}" if offset_num else sql try: cs = connection.cursor(pymysql.cursors.DictCursor) count = cs.execute(sql) result = cs.fetchall() if count: mysql_logger.info(f"Mysql 查取成功 {count} 条") return result else: mysql_logger.info("数据库查取数为0") except TypeError: mysql_logger.error("MySQL查取失败,请检查") finally: cs.close()
def __init__(self, table_name="CHA_BRANCH_MAPBAR", collection_name="mapbar"): # phoenix connection self.p_client = PhoenixHbase(table_name=table_name) self.connection = self.p_client.connect_to_phoenix() # MongoDB connection self.m_client = MongoClient(mongo_collection=collection_name, entity_code="MAPBAR_DEATAIL_BJ") self.m_client.mongo_host = "172.22.69.35" self.m_client.mongo_port = 20000 self.m_client.client = pymongo.MongoClient(host="172.22.69.35", port=20000, serverSelectionTimeoutMS=60, connectTimeoutMS=60, connect=False) self.db, self.collection_list = self.m_client.client_to_mongodb() self.collection = self.m_client.get_check_collection( db=self.db, collection_list=self.collection_list) # Log self.logger = Logger().logger # count self.count = 0
def __init__(self): self.file_list = list() self.get_code_list() self.logger = Logger().logger self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.copy_mongo_data_list = list() self.remove_id_list = list() # 字段验证列表 self.verify_list = [ "ID_", "CONTENT_", "NOTICE_TIME_", "TITLE_", "PROJECT_NAME_", "BID_CONTENT_", "SIGN_START_TIME_", "SIGN_END_TIME_", "OPEN_BID_TIME_", "OPEN_BID_PLACE_", "BID_AGENCY_", "APPLY_CONDITION_", "SIGN_QUALIFICATION_", "PROJECT_ID_", "WIN_CANDIDATE_", "CANDIDATE_RANK_", "BID_", "URL_", "DEALTIME_", "CREATE_TIME_", "ENTITY_NAME_", "ENTITY_CODE_", "ENTITY_STATUS_", "SIGN_MATERIAL_", "BID_TYPE_", "DATETIME_", "BUDGET_PRICE_", "PASS_REASON_", "PRESALE_CONTENT_", "PRESALE_WAY_", "PRESALE_START_TIME_", "PRESALE_END_TIME_", "PRESALE_ADDR_", "PRESALE_PREPARE_", "IMAGE_" ]
def __init__(self, entity_type="FOR_TEST_WECHAT"): """ 初始化参数 :param entity_type: WECHAT """ self.entity_type = entity_type self.logger = Logger().logger # 创建 Phoenix 对象 self.p_client = PhoenixHbase(table_name=self.entity_type) # 连接 Phoenix self.connection = self.p_client.connect_to_phoenix() self.remove_id_list = list() self.copy_mongo_data_list = list() self.find_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.bad_count = 0 self.error_count = 0 self.data_id = "" self.row_key_count = 0
def __init__(self, table_name, collection_name, param): self.logger = Logger().logger self.remove_id_list = list() self.copy_mongo_data_list = list() self.branch_code_list = list() self.find_count = 0 self.bad_count = 0 self.success_count = 0 self.remove_count = 0 self.old_count = 0 self.name_dict = { '工行': 'ICBC', '工商银行': 'ICBC', '农行': 'ABC', '农业银行': 'ABC', '中行': 'BOC', '中银': 'BOC', '建行': 'CCB', '邮政储蓄银行': 'PSBC', '建信': 'CCB', '建设银行': 'CCB', '交行': 'BCM', '交通银行': 'BCM', '邮储银行': 'PSBC', '浙商银行': 'CZB', '渤海银行': 'CBHB', '中信银行': 'ECITIC', '光大银行': 'CEB', '华夏银行': 'HB', '招行': 'CMB', '招商银行': 'CMB', '兴业银行': 'CIB', '广发银行': 'CGB', '平安银行': 'PAB', '浦发银行': 'SPDB', '恒丰银行': 'EBCL', '浦东发展银行': 'SPDB', '民生银行': 'CMBC', '汇丰银行': 'HSBC', '渣打银行': 'SC', '南海农商银行': 'NRC ', '顺德农村商业银行': 'sdebank', } super(WeiboBasicInfoScript, self).__init__(table_name=table_name, collection_name=collection_name, param=param, verify_field={"WEIBO_CODE_": "WEIBO_CODE_"})
def __new__(cls, table_name, collection_name): """ :param table_name: Hbase 表名 :param collection_name: MongoDB 集合名 :return: """ if not hasattr(cls, "instance"): cls.instance = super(GenericScript, cls).__new__(cls) # phoenix connection cls.p_client = PhoenixHbase(table_name=table_name) cls.connection = cls.p_client.connect_to_phoenix() # MongoDB connection cls.m_client = MongoClient(mongo_collection=collection_name) cls.db, cls.collection_list = cls.m_client.client_to_mongodb() # Mysql connection cls.province_list, cls.city_list, cls.area_list, cls.dir_area_list = cls.area_from_mysql( ) # Log cls.logger = Logger().logger # 银行字典 cls.bank_dict = { '中国工商银行': 'ICBC', '中国农业银行': 'ABC', '中国银行': 'BOC', '中国建设银行': 'CCB', '交通银行': 'BOCOM', '中国邮政储蓄银行': 'PSBC', '浙商银行': 'CZB', '渤海银行': 'CBHB', '中信银行': 'ECITIC', '中国光大银行': 'CEB', '华夏银行': 'HXB', '中国民生银行': 'CMBC', '招商银行': 'CMB', '兴业银行': 'CIB', '广发银行': 'CGB', '平安银行': 'PAB', '浦发银行': 'SPDB', '恒丰银行': 'EBCL' } return cls.instance
def __new__(cls, *args, **kwargs): if not hasattr(cls, "instance"): cls.instance = super(DataTransfer, cls).__new__(cls) config = { # "host": "192.168.1.103", "host": "172.22.69.43", "port": 3306, "table": "sch_job_inst", "database": "ijep", "user": "******", "password": "******", "charset": "utf8" } cls.mysql_client = MysqlClient(**config) cls.mysql_connection = cls.mysql_client.client_to_mysql() cls.hbase_client = PhoenixHbase("SCH_JOB_INST") cls.hbase_connection = cls.hbase_client.connect_to_phoenix() cls.logger = Logger().logger return cls.instance
def __init__(self, table_name="CHA_BRANCH_WEIBO_BASIC", collection_name="WEIBOBASICINFO"): # phoenix connection self.p_client = PhoenixHbase(table_name=table_name) self.connection = self.p_client.connect_to_phoenix() # Mongo connection self.m_client = MongoClient(entity_code="CMBCMICROBLOG", mongo_collection=collection_name) self.mongo_host = "172.22.69.35" self.mongo_port = 20000 self.m_client.client = pymongo.MongoClient(host="172.22.69.35", port=20000, serverSelectionTimeoutMS=60, connectTimeoutMS=60, connect=False) self.db, self.collection_list = self.m_client.client_to_mongodb() self.collection = self.m_client.get_check_collection( db=self.db, collection_list=self.collection_list) # Log self.logger = Logger().logger
def get_mongo_column_dict(self, collection, column1, column2): mon_logger = Logger().logger try: mon_logger.info("开始查取数据") result = collection.aggregate([{ "$project": { "_id": 0, column1: 1, column2: 1 } }]) return result except TypeError as e: mon_logger.error( "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确".format(e)) raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e)) finally: self.m_client.client.close()
def delete_from_mysql(self, connection, where_condition): """ 删除 :param connection: :param where_condition: where 条件 :return: """ mysql_logger = Logger().logger if "where" in where_condition or "WHERE" in where_condition: sql = f"DELETE FROM {self.mysql_table} {where_condition}" else: sql = f"DELETE FROM {self.mysql_table} WHERE {where_condition}" try: count = self.cs_commit(connection=connection, sql=sql) mysql_logger.info(f"MySQL 删除成功 {count} 条") except Exception as e: mysql_logger.exception(f"MySQL 删除失败,ERROR: {e}")