def match_from_mongo(self, collection, match, output): mon_logger = Logger().logger try: mon_logger.info("开始查取数据") result = collection.aggregate([{ "$match": match }, { "$project": { "budgetPrice": 1, "_id": 0, output: 1 } }]) for i in result: if i is not None: mon_logger.info("数据查取成功") return i[output] else: mon_logger.error("WEIBO_CODE_ 查取数据为空") # raise Exception("WEIBO_CODE_ 查取失败") except TypeError as e: mon_logger.error( "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match)) raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e)) finally: self.client_close()
def client_to_mongodb(self): mon_logger = Logger().logger mon_logger.info("开始连接MongoDB({}:{}),database={}".format( self.mongo_host, self.mongo_port, self.mongo_database)) try: collection_list = self.db.collection_names() mon_logger.info("MongoDB({}:{})连接成功".format( self.mongo_host, self.mongo_port)) return collection_list except pymongo.errors.ServerSelectionTimeoutError as e: mon_logger.warning("MongoDB({}:{})连接失败".format( self.mongo_host, self.mongo_port)) for i in range(2, 6): try: collection_list = self.db.collection_names() mon_logger.info("MongoDB({}:{})连接成功".format( self.mongo_host, self.mongo_port)) return collection_list except Exception: mon_logger.warning("MongoDB({}:{})第{}次连接失败".format( self.mongo_host, self.mongo_port, i)) if i == 5: mon_logger.error( "MongoDB连接失败,错误信息为: {}, 请检查各项参数是否正确host={}, port={},database={}" .format(e, self.mongo_host, self.mongo_port, self.mongo_database)) self.client_close()
def match_from_mongo(self, collection, match, output): """ 查询所有数据, 返回游标对象(聚合) :param collection: :param match: match condition like dict {"ENTITY_CODE_": "XXXXXXXXX"} :param output: output field like list or dict :return: """ mon_logger = Logger().logger if isinstance(output, str): output = [output] try: mon_logger.info("MongoDB 开始查取数据") output_condition = dict() for o in output: output_condition[o] = 1 result = collection.aggregate([{ "$match": match }, { "$project": output_condition }]) mon_logger.info("MongoDB 数据查取成功") return result except TypeError as e: mon_logger.error( "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match)) # raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e)) finally: self.client_close()
def client_to_mysql(self): ''' :return: 返回mysql连接 ''' mysql_logger = Logger().logger try: mysql_logger.info("正在连接MySQL({}@{}:{})".format( self.mysql_user, self.mysql_host, self.mysql_port)) connection = pymysql.connect(**self.mysql_config) mysql_logger.info("Mysql连接成功({}@{}:{})".format( self.mysql_user, self.mysql_host, self.mysql_port)) return connection except pymysql.err.OperationalError as e: for retry_count in range(2, 7): try: mysql_logger.warning( "MySQL连接失败,正在重试第{}次连接".format(retry_count)) connection = pymysql.connect(**self.mysql_config) mysql_logger.info("Mysql连接成功") return connection except Exception as e: mysql_logger.warning("第{}次连接MySQL失败".format(retry_count)) # print(retry_count) if retry_count == 6: mysql_logger.error("MySQL连接失败,错误信息为{}".format(e))
def http_client(self, url, param=None, method='GET', code="utf-8"): # log = ICrawlerLog(name='spider').save log = Logger().logger # username = "******" # 您的用户名 username = "******" # 您的用户名 # password = "******" # 您的密码 password = "******" # 您的密码 ip = self.wandou() ips = ip.split(':') proxy_ip = str(ips[0]) # 代理ip; proxy_port = str(ips[1]) # 代理端口号; print(proxy_ip, proxy_port) headers = { 'Proxy-Authorization': 'Basic %s' % (self.base_code(username, password)) } if param: headers = dict(headers, **param) try: con = http.client.HTTPConnection(proxy_ip, port=proxy_port, timeout=10) con.request(method, url, headers=headers) resu = con.getresponse() text = resu.read().decode(code, errors="ignore") return text except Exception as e: log.error(e.args) return None
def search_by_status(self, collection, data_id=None): mon_logger = Logger().logger try: mon_logger.info("开始查取数据") if data_id: find_id = ObjectId(data_id) result_one = collection.find_one({ "$and": [{ "ENTITY_CODE_": self.mongo_entity_code }, { "_id": { "$gte": find_id } }, { "d": { "$exists": False } }] }) else: result_one = collection.find_one({ "$and": [{ "ENTITY_CODE_": self.mongo_entity_code }, { "d": { "$exists": False } }] }) if result_one is not None: result = collection.find( { "$and": [{ "ENTITY_CODE_": self.mongo_entity_code }, { "_id": { "$gte": result_one["_id"] } }, { "d": { "$exists": False } }] }, no_cursor_timeout=True) mon_logger.info("ENTITY: {} 数据查取成功共 {}条".format( result.count())) return result else: mon_logger.info("ENTITY: {} 数据查取为空".format( self.mongo_entity_code)) return None except TypeError as e: mon_logger.error( "MongoDB数据查取失败,错误信息为{}, 请检查 ENTITY_CODE_ 是否正确:{}".format( e, self.mongo_entity_code)) finally: self.client_close()
def get_check_collection(self, collection_list): mon_logger = Logger().logger if self.mongo_collection in collection_list: collection = self.db[self.mongo_collection] return collection else: mon_logger.error("MongoDB没有该集合,请检查") self.client_close()
def get_check_collection(self, db, collection_list): mon_logger = Logger().logger if self.mongo_collection in collection_list: collection = db[self.mongo_collection] return collection else: mon_logger.error( f"MongoDB {self.mongo_db} 没有 {self.mongo_collection} 集合,请检查") return
def search_from_mysql(self, connection, output=None, where_condition=None, limit_num=None, offset_num=None): """ 查询 :param connection: :param output: 输出字段 :param where_condition: where 条件 :param limit_num: 输出数量 :param offset_num: 跳过数量 :return: """ mysql_logger = Logger().logger if output: if isinstance(output, str): sql = f"SELECT {output} FROM {self.mysql_table}" elif isinstance(output, (tuple, list)): sql = f"SELECT {','.join(output)} FROM {self.mysql_table}" else: raise Exception("not format type of \"output\"") else: sql = f"SELECT * FROM {self.mysql_table}" if where_condition: if "where" in where_condition or "WHERE" in where_condition: sql = sql + " " + where_condition else: sql = sql + f" WHERE {where_condition}" sql = sql + f" LIMIT {limit_num}" if limit_num else sql sql = sql + f" OFFSET {offset_num}" if offset_num else sql try: cs = connection.cursor(pymysql.cursors.DictCursor) count = cs.execute(sql) result = cs.fetchall() if count: mysql_logger.info(f"Mysql 查取成功 {count} 条") return result else: mysql_logger.info("数据库查取数为0") except TypeError: mysql_logger.error("MySQL查取失败,请检查") finally: cs.close()
def get_mongo_column_dict(self, collection, column1, column2): mon_logger = Logger().logger try: mon_logger.info("开始查取数据") result = collection.aggregate([{ "$project": { "_id": 0, column1: 1, column2: 1 } }]) return result except TypeError as e: mon_logger.error( "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确".format(e)) raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e)) finally: self.m_client.client.close()
def wandou(self): """ 豌豆代理获取 :return: """ # log = ICrawlerLog(name='spider').save log = Logger().logger url_wandou = r'http://h.wandouip.com/get/ip-list?pack=853&num=1&xy=1&type=2&lb=\r\n&mr=1&' try: time.sleep(random.randint(1, 5)) re = requests.get(url=url_wandou).json() print(re) time.sleep(100) except: print(2) log.error('豌豆代理外部接口获取ip异常!') return False i = re.get('data')[0] ip = '{ip}:{port}'.format(ip=i.get('ip'), port=i.get('port')) print(ip) return ip
def get_data_from_mongodb(self, collection, entity_code=None, exclude_code=None, limit_number=None, data_id=None, find_query=None): """ 从 MongoDB 获取数据 :param collection: :param entity_code: 需要查取的 实体编码 :param exclude_code: 需要排除的 实体编码 :param limit_number: 查取的数据条数 :param data_id: 查取 data_id 后的数据 :return: """ mon_logger = Logger().logger query_list = list() if isinstance(find_query, dict): query_list.append(find_query) elif isinstance(find_query, list): query_list.extend(find_query) if isinstance(entity_code, str): query_list.append({"ENTITY_CODE_": entity_code}) elif isinstance(entity_code, (list, tuple)): query_list.append({"ENTITY_CODE_": {"$in": list(entity_code)}}) if isinstance(exclude_code, str): query_list.append({"ENTITY_CODE_": {"$ne": exclude_code}}) elif isinstance(exclude_code, (list, tuple)): query_list.append({"ENTITY_CODE_": {"$nin": list(exclude_code)}}) if data_id: find_id = ObjectId(data_id) query_list.append({"_id": {"$gte": find_id}}) if query_list: query = {"$and": query_list} else: query = {} try: mon_logger.info("MongoDB 开始查取数据") result_one = collection.find_one(query) if result_one: if limit_number: result = collection.find(query, no_cursor_timeout=True).limit( int(limit_number)) if entity_code: mon_logger.info( f"ENTITY: {entity_code} 数据查取成功共 {limit_number} 条") else: mon_logger.info("数据查取成功共 {}条".format(limit_number)) else: result = collection.find(query, no_cursor_timeout=True) if entity_code: mon_logger.info( f"ENTITY: {entity_code} 数据查取成功共 {result.count()}条") else: mon_logger.info("数据查取成功共 {}条".format(result.count())) return result else: if entity_code: mon_logger.info("ENTITY: {} 数据查取为空".format( self.mongo_entity_code)) else: mon_logger.info("数据查取为空".format(self.mongo_entity_code)) return None except TypeError as e: mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format( e, self.mongo_entity_code)) except pymongo.errors.ServerSelectionTimeoutError as e: mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e)) result_one = collection.find_one(query) if result_one: if limit_number: result = collection.find(query, no_cursor_timeout=True).limit( int(limit_number)) if entity_code: mon_logger.info( f"ENTITY: {entity_code} 数据查取成功共 {limit_number} 条") else: mon_logger.info("数据查取成功共 {}条".format(limit_number)) else: result = collection.find(query, no_cursor_timeout=True) if entity_code: mon_logger.info( f"ENTITY: {entity_code} 数据查取成功共 {result.count()}条") else: mon_logger.info("数据查取成功共 {}条".format(result.count())) return result else: if entity_code: mon_logger.info("ENTITY: {} 数据查取为空".format( self.mongo_entity_code)) else: mon_logger.info("数据查取为空".format(self.mongo_entity_code)) return None
def all_from_mongodb(self, collection, data_id=None, d=False): mon_logger = Logger().logger if data_id: find_id = ObjectId(data_id) try: mon_logger.info("开始查取数据") # result = collection.find_one({"_id": {"$gte": find_id}}) result = collection.find_one({ "$and": [{ "_id": { "$gte": find_id } }, { "ORDER_ID": { "$exists": False } }] }) if result is not None: result = collection.find( { "$and": [{ "_id": { "$gte": find_id } }, { "ORDER_ID": { "$exists": False } }] }, no_cursor_timeout=True) mon_logger.info("数据查取成功, 共 {} 条".format(result.count())) return result else: mon_logger.info("MongoDB 查取数据为空") return None except TypeError as e: mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format( e, self.mongo_entity_code)) except pymongo.errors.ServerSelectionTimeoutError as e: mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e)) result = collection.find_one({ "$and": [{ "_id": { "$gte": find_id } }, { "ORDER_ID": { "$exists": False } }] }) if result: result = collection.find( { "$and": [{ "_id": { "$gte": find_id } }, { "ORDER_ID": { "$exists": False } }] }, no_cursor_timeout=True) mon_logger.info("数据查取成功, 共 {} 条".format(result.count())) return result else: mon_logger.info("MongoDB 查取数据为空") return None else: if d: try: mon_logger.info("开始查取数据") result = collection.find_one() if result is not None: result = collection.find(no_cursor_timeout=True) mon_logger.info("数据查取成功, 共 {} 条".format( result.count())) return result else: mon_logger.info("MongoDB 查取数据为空") return None except TypeError as e: mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format( e, self.mongo_entity_code)) except pymongo.errors.ServerSelectionTimeoutError as e: mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e)) result = collection.find_one() if result: result = collection.find(no_cursor_timeout=True) mon_logger.info("数据查取成功, 共 {} 条".format( result.count())) return result else: mon_logger.info("MongoDB 查取数据为空") return None else: try: mon_logger.info("开始查取数据") result = collection.find_one({"d": {"$exists": False}}) if result is not None: result = collection.find({"d": { "$exists": False }}, no_cursor_timeout=True) mon_logger.info("数据查取成功, 共 {} 条".format( result.count())) return result else: mon_logger.info("MongoDB 查取数据为空") return None except TypeError as e: mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format( e, self.mongo_entity_code)) except pymongo.errors.ServerSelectionTimeoutError as e: mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e)) result = collection.find_one() if result: result = collection.find(no_cursor_timeout=True) mon_logger.info("数据查取成功, 共 {} 条".format( result.count())) return result else: mon_logger.info("MongoDB 查取数据为空") return None
def get_data_and_update(self, collection, entity_code, exclude_code, update_dict, data_id=None, other_query=None, sort_query=None): """ 查询一条数据并更新 :param collection: :param entity_code: 需要查取的 实体编码 :param exclude_code: 需要排除的 实体编码 :param update_dict: 需要更新的字段与值 :param data_id: 查取 data_id 后的数据 :param other_query: 其他过滤条件 :return: """ mon_logger = Logger().logger query_list = list() if isinstance(entity_code, str): query_list.append({"ENTITY_CODE_": entity_code}) elif isinstance(entity_code, (list, tuple)): query_list.append({"ENTITY_CODE_": {"$in": list(entity_code)}}) if isinstance(exclude_code, str): query_list.append({"ENTITY_CODE_": {"$ne": exclude_code}}) elif isinstance(exclude_code, (list, tuple)): query_list.append({"ENTITY_CODE_": {"$nin": list(exclude_code)}}) if isinstance(other_query, dict): query_list.append(other_query) elif isinstance(other_query, list): query_list.extend(other_query) if data_id: find_id = ObjectId(data_id) query_list.append({"_id": {"$gte": find_id}}) if query_list: query = {"$and": query_list} else: query = {} try: mon_logger.info(f"query={query}") result_one = collection.find_one_and_update(query, update_dict, sort=sort_query) if result_one: mon_logger.info(f"MongoDB--数据查取并更新成功") return result_one else: mon_logger.info("MongoDB 数据查取为空".format( self.mongo_entity_code)) return None except TypeError as e: mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format( e, self.mongo_entity_code)) except pymongo.errors.ServerSelectionTimeoutError as e: mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e)) result_one = collection.find_one_and_update(query, update_dict) if result_one: mon_logger.info("MongoDB--数据查取并更新成功") return result_one else: mon_logger.info("MongoDB 数据查取为空".format( self.mongo_entity_code)) return None