Пример #1
0
    def match_from_mongo(self, collection, match, output):
        mon_logger = Logger().logger
        try:
            mon_logger.info("开始查取数据")
            result = collection.aggregate([{
                "$match": match
            }, {
                "$project": {
                    "budgetPrice": 1,
                    "_id": 0,
                    output: 1
                }
            }])
            for i in result:
                if i is not None:
                    mon_logger.info("数据查取成功")
                    return i[output]
                else:
                    mon_logger.error("WEIBO_CODE_ 查取数据为空")
                    # raise Exception("WEIBO_CODE_ 查取失败")

        except TypeError as e:
            mon_logger.error(
                "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match))
            raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e))

        finally:
            self.client_close()
Пример #2
0
 def client_to_mongodb(self):
     mon_logger = Logger().logger
     mon_logger.info("开始连接MongoDB({}:{}),database={}".format(
         self.mongo_host, self.mongo_port, self.mongo_database))
     try:
         collection_list = self.db.collection_names()
         mon_logger.info("MongoDB({}:{})连接成功".format(
             self.mongo_host, self.mongo_port))
         return collection_list
     except pymongo.errors.ServerSelectionTimeoutError as e:
         mon_logger.warning("MongoDB({}:{})连接失败".format(
             self.mongo_host, self.mongo_port))
         for i in range(2, 6):
             try:
                 collection_list = self.db.collection_names()
                 mon_logger.info("MongoDB({}:{})连接成功".format(
                     self.mongo_host, self.mongo_port))
                 return collection_list
             except Exception:
                 mon_logger.warning("MongoDB({}:{})第{}次连接失败".format(
                     self.mongo_host, self.mongo_port, i))
             if i == 5:
                 mon_logger.error(
                     "MongoDB连接失败,错误信息为: {}, 请检查各项参数是否正确host={}, port={},database={}"
                     .format(e, self.mongo_host, self.mongo_port,
                             self.mongo_database))
                 self.client_close()
Пример #3
0
    def match_from_mongo(self, collection, match, output):
        """
        查询所有数据, 返回游标对象(聚合)
        :param collection:
        :param match: match condition like dict {"ENTITY_CODE_": "XXXXXXXXX"}
        :param output: output field like list or dict
        :return:
        """
        mon_logger = Logger().logger
        if isinstance(output, str):
            output = [output]
        try:
            mon_logger.info("MongoDB 开始查取数据")
            output_condition = dict()
            for o in output:
                output_condition[o] = 1
            result = collection.aggregate([{
                "$match": match
            }, {
                "$project": output_condition
            }])
            mon_logger.info("MongoDB 数据查取成功")
            return result

        except TypeError as e:
            mon_logger.error(
                "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确:{}".format(e, match))
            # raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e))

        finally:
            self.client_close()
Пример #4
0
 def client_to_mysql(self):
     '''
     :return: 返回mysql连接
     '''
     mysql_logger = Logger().logger
     try:
         mysql_logger.info("正在连接MySQL({}@{}:{})".format(
             self.mysql_user, self.mysql_host, self.mysql_port))
         connection = pymysql.connect(**self.mysql_config)
         mysql_logger.info("Mysql连接成功({}@{}:{})".format(
             self.mysql_user, self.mysql_host, self.mysql_port))
         return connection
     except pymysql.err.OperationalError as e:
         for retry_count in range(2, 7):
             try:
                 mysql_logger.warning(
                     "MySQL连接失败,正在重试第{}次连接".format(retry_count))
                 connection = pymysql.connect(**self.mysql_config)
                 mysql_logger.info("Mysql连接成功")
                 return connection
             except Exception as e:
                 mysql_logger.warning("第{}次连接MySQL失败".format(retry_count))
                 # print(retry_count)
                 if retry_count == 6:
                     mysql_logger.error("MySQL连接失败,错误信息为{}".format(e))
Пример #5
0
    def http_client(self, url, param=None, method='GET', code="utf-8"):
        # log = ICrawlerLog(name='spider').save
        log = Logger().logger
        # username = "******"  # 您的用户名
        username = "******"  # 您的用户名
        # password = "******"  # 您的密码
        password = "******"  # 您的密码

        ip = self.wandou()
        ips = ip.split(':')
        proxy_ip = str(ips[0])  # 代理ip;
        proxy_port = str(ips[1])  # 代理端口号;
        print(proxy_ip, proxy_port)
        headers = {
            'Proxy-Authorization':
            'Basic %s' % (self.base_code(username, password))
        }

        if param:
            headers = dict(headers, **param)
        try:
            con = http.client.HTTPConnection(proxy_ip,
                                             port=proxy_port,
                                             timeout=10)
            con.request(method, url, headers=headers)
            resu = con.getresponse()
            text = resu.read().decode(code, errors="ignore")
            return text
        except Exception as e:
            log.error(e.args)
            return None
Пример #6
0
    def search_by_status(self, collection, data_id=None):
        mon_logger = Logger().logger
        try:
            mon_logger.info("开始查取数据")
            if data_id:
                find_id = ObjectId(data_id)
                result_one = collection.find_one({
                    "$and": [{
                        "ENTITY_CODE_": self.mongo_entity_code
                    }, {
                        "_id": {
                            "$gte": find_id
                        }
                    }, {
                        "d": {
                            "$exists": False
                        }
                    }]
                })
            else:
                result_one = collection.find_one({
                    "$and": [{
                        "ENTITY_CODE_": self.mongo_entity_code
                    }, {
                        "d": {
                            "$exists": False
                        }
                    }]
                })
            if result_one is not None:
                result = collection.find(
                    {
                        "$and": [{
                            "ENTITY_CODE_": self.mongo_entity_code
                        }, {
                            "_id": {
                                "$gte": result_one["_id"]
                            }
                        }, {
                            "d": {
                                "$exists": False
                            }
                        }]
                    },
                    no_cursor_timeout=True)

                mon_logger.info("ENTITY: {} 数据查取成功共 {}条".format(
                    result.count()))
                return result
            else:
                mon_logger.info("ENTITY: {} 数据查取为空".format(
                    self.mongo_entity_code))
                return None
        except TypeError as e:
            mon_logger.error(
                "MongoDB数据查取失败,错误信息为{}, 请检查 ENTITY_CODE_ 是否正确:{}".format(
                    e, self.mongo_entity_code))
        finally:
            self.client_close()
Пример #7
0
 def get_check_collection(self, collection_list):
     mon_logger = Logger().logger
     if self.mongo_collection in collection_list:
         collection = self.db[self.mongo_collection]
         return collection
     else:
         mon_logger.error("MongoDB没有该集合,请检查")
         self.client_close()
Пример #8
0
 def get_check_collection(self, db, collection_list):
     mon_logger = Logger().logger
     if self.mongo_collection in collection_list:
         collection = db[self.mongo_collection]
         return collection
     else:
         mon_logger.error(
             f"MongoDB {self.mongo_db} 没有 {self.mongo_collection} 集合,请检查")
         return
Пример #9
0
    def search_from_mysql(self,
                          connection,
                          output=None,
                          where_condition=None,
                          limit_num=None,
                          offset_num=None):
        """
        查询
        :param connection:
        :param output: 输出字段
        :param where_condition: where 条件
        :param limit_num: 输出数量
        :param offset_num: 跳过数量
        :return:
        """
        mysql_logger = Logger().logger
        if output:
            if isinstance(output, str):
                sql = f"SELECT {output} FROM {self.mysql_table}"
            elif isinstance(output, (tuple, list)):
                sql = f"SELECT {','.join(output)} FROM {self.mysql_table}"
            else:
                raise Exception("not format type of \"output\"")
        else:
            sql = f"SELECT * FROM {self.mysql_table}"

        if where_condition:
            if "where" in where_condition or "WHERE" in where_condition:
                sql = sql + " " + where_condition
            else:
                sql = sql + f" WHERE {where_condition}"

        sql = sql + f" LIMIT {limit_num}" if limit_num else sql

        sql = sql + f" OFFSET {offset_num}" if offset_num else sql

        try:
            cs = connection.cursor(pymysql.cursors.DictCursor)
            count = cs.execute(sql)
            result = cs.fetchall()
            if count:
                mysql_logger.info(f"Mysql 查取成功 {count} 条")
                return result
            else:
                mysql_logger.info("数据库查取数为0")
        except TypeError:
            mysql_logger.error("MySQL查取失败,请检查")
        finally:
            cs.close()
Пример #10
0
    def get_mongo_column_dict(self, collection, column1, column2):
        mon_logger = Logger().logger
        try:
            mon_logger.info("开始查取数据")
            result = collection.aggregate([{
                "$project": {
                    "_id": 0,
                    column1: 1,
                    column2: 1
                }
            }])
            return result
        except TypeError as e:
            mon_logger.error(
                "WEIBO_CODE_ 数据查取失败,错误信息为{}, 请检查匹配规则是否正确".format(e))
            raise Exception("WEIBO_CODE_ 查取失败, 错误信息为{}".format(e))

        finally:
            self.m_client.client.close()
Пример #11
0
 def wandou(self):
     """
     豌豆代理获取
     :return:
     """
     # log = ICrawlerLog(name='spider').save
     log = Logger().logger
     url_wandou = r'http://h.wandouip.com/get/ip-list?pack=853&num=1&xy=1&type=2&lb=\r\n&mr=1&'
     try:
         time.sleep(random.randint(1, 5))
         re = requests.get(url=url_wandou).json()
         print(re)
         time.sleep(100)
     except:
         print(2)
         log.error('豌豆代理外部接口获取ip异常!')
         return False
     i = re.get('data')[0]
     ip = '{ip}:{port}'.format(ip=i.get('ip'), port=i.get('port'))
     print(ip)
     return ip
Пример #12
0
    def get_data_from_mongodb(self,
                              collection,
                              entity_code=None,
                              exclude_code=None,
                              limit_number=None,
                              data_id=None,
                              find_query=None):
        """
        从 MongoDB 获取数据
        :param collection:
        :param entity_code: 需要查取的 实体编码
        :param exclude_code: 需要排除的 实体编码
        :param limit_number: 查取的数据条数
        :param data_id: 查取 data_id 后的数据
        :return:
        """
        mon_logger = Logger().logger
        query_list = list()
        if isinstance(find_query, dict):
            query_list.append(find_query)
        elif isinstance(find_query, list):
            query_list.extend(find_query)
        if isinstance(entity_code, str):
            query_list.append({"ENTITY_CODE_": entity_code})
        elif isinstance(entity_code, (list, tuple)):
            query_list.append({"ENTITY_CODE_": {"$in": list(entity_code)}})

        if isinstance(exclude_code, str):
            query_list.append({"ENTITY_CODE_": {"$ne": exclude_code}})
        elif isinstance(exclude_code, (list, tuple)):
            query_list.append({"ENTITY_CODE_": {"$nin": list(exclude_code)}})

        if data_id:
            find_id = ObjectId(data_id)
            query_list.append({"_id": {"$gte": find_id}})

        if query_list:
            query = {"$and": query_list}
        else:
            query = {}
        try:
            mon_logger.info("MongoDB 开始查取数据")
            result_one = collection.find_one(query)
            if result_one:
                if limit_number:
                    result = collection.find(query,
                                             no_cursor_timeout=True).limit(
                                                 int(limit_number))
                    if entity_code:
                        mon_logger.info(
                            f"ENTITY: {entity_code} 数据查取成功共 {limit_number} 条")
                    else:
                        mon_logger.info("数据查取成功共 {}条".format(limit_number))
                else:
                    result = collection.find(query, no_cursor_timeout=True)
                    if entity_code:
                        mon_logger.info(
                            f"ENTITY: {entity_code} 数据查取成功共 {result.count()}条")
                    else:
                        mon_logger.info("数据查取成功共 {}条".format(result.count()))

                return result
            else:
                if entity_code:
                    mon_logger.info("ENTITY: {} 数据查取为空".format(
                        self.mongo_entity_code))
                else:
                    mon_logger.info("数据查取为空".format(self.mongo_entity_code))
                return None
        except TypeError as e:
            mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format(
                e, self.mongo_entity_code))
        except pymongo.errors.ServerSelectionTimeoutError as e:
            mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e))
            result_one = collection.find_one(query)
            if result_one:
                if limit_number:
                    result = collection.find(query,
                                             no_cursor_timeout=True).limit(
                                                 int(limit_number))
                    if entity_code:
                        mon_logger.info(
                            f"ENTITY: {entity_code} 数据查取成功共 {limit_number} 条")
                    else:
                        mon_logger.info("数据查取成功共 {}条".format(limit_number))
                else:
                    result = collection.find(query, no_cursor_timeout=True)
                    if entity_code:
                        mon_logger.info(
                            f"ENTITY: {entity_code} 数据查取成功共 {result.count()}条")
                    else:
                        mon_logger.info("数据查取成功共 {}条".format(result.count()))

                return result
            else:
                if entity_code:
                    mon_logger.info("ENTITY: {} 数据查取为空".format(
                        self.mongo_entity_code))
                else:
                    mon_logger.info("数据查取为空".format(self.mongo_entity_code))
                return None
Пример #13
0
    def all_from_mongodb(self, collection, data_id=None, d=False):
        mon_logger = Logger().logger
        if data_id:
            find_id = ObjectId(data_id)
            try:
                mon_logger.info("开始查取数据")
                # result = collection.find_one({"_id": {"$gte": find_id}})
                result = collection.find_one({
                    "$and": [{
                        "_id": {
                            "$gte": find_id
                        }
                    }, {
                        "ORDER_ID": {
                            "$exists": False
                        }
                    }]
                })
                if result is not None:
                    result = collection.find(
                        {
                            "$and": [{
                                "_id": {
                                    "$gte": find_id
                                }
                            }, {
                                "ORDER_ID": {
                                    "$exists": False
                                }
                            }]
                        },
                        no_cursor_timeout=True)
                    mon_logger.info("数据查取成功, 共 {} 条".format(result.count()))
                    return result
                else:
                    mon_logger.info("MongoDB 查取数据为空")
                    return None
            except TypeError as e:
                mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format(
                    e, self.mongo_entity_code))
            except pymongo.errors.ServerSelectionTimeoutError as e:
                mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e))
                result = collection.find_one({
                    "$and": [{
                        "_id": {
                            "$gte": find_id
                        }
                    }, {
                        "ORDER_ID": {
                            "$exists": False
                        }
                    }]
                })
                if result:
                    result = collection.find(
                        {
                            "$and": [{
                                "_id": {
                                    "$gte": find_id
                                }
                            }, {
                                "ORDER_ID": {
                                    "$exists": False
                                }
                            }]
                        },
                        no_cursor_timeout=True)
                    mon_logger.info("数据查取成功, 共 {} 条".format(result.count()))
                    return result
                else:
                    mon_logger.info("MongoDB 查取数据为空")
                    return None
        else:
            if d:
                try:
                    mon_logger.info("开始查取数据")
                    result = collection.find_one()
                    if result is not None:
                        result = collection.find(no_cursor_timeout=True)
                        mon_logger.info("数据查取成功, 共 {} 条".format(
                            result.count()))
                        return result
                    else:
                        mon_logger.info("MongoDB 查取数据为空")
                        return None
                except TypeError as e:
                    mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format(
                        e, self.mongo_entity_code))
                except pymongo.errors.ServerSelectionTimeoutError as e:
                    mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e))
                    result = collection.find_one()
                    if result:
                        result = collection.find(no_cursor_timeout=True)
                        mon_logger.info("数据查取成功, 共 {} 条".format(
                            result.count()))
                        return result
                    else:
                        mon_logger.info("MongoDB 查取数据为空")
                        return None
            else:
                try:
                    mon_logger.info("开始查取数据")
                    result = collection.find_one({"d": {"$exists": False}})
                    if result is not None:
                        result = collection.find({"d": {
                            "$exists": False
                        }},
                                                 no_cursor_timeout=True)
                        mon_logger.info("数据查取成功, 共 {} 条".format(
                            result.count()))
                        return result
                    else:
                        mon_logger.info("MongoDB 查取数据为空")
                        return None

                except TypeError as e:
                    mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format(
                        e, self.mongo_entity_code))
                except pymongo.errors.ServerSelectionTimeoutError as e:
                    mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e))
                    result = collection.find_one()
                    if result:
                        result = collection.find(no_cursor_timeout=True)
                        mon_logger.info("数据查取成功, 共 {} 条".format(
                            result.count()))
                        return result
                    else:
                        mon_logger.info("MongoDB 查取数据为空")
                        return None
Пример #14
0
    def get_data_and_update(self,
                            collection,
                            entity_code,
                            exclude_code,
                            update_dict,
                            data_id=None,
                            other_query=None,
                            sort_query=None):
        """
        查询一条数据并更新
        :param collection:
        :param entity_code: 需要查取的 实体编码
        :param exclude_code: 需要排除的 实体编码
        :param update_dict: 需要更新的字段与值
        :param data_id: 查取 data_id 后的数据
        :param other_query: 其他过滤条件
        :return:
        """
        mon_logger = Logger().logger
        query_list = list()
        if isinstance(entity_code, str):
            query_list.append({"ENTITY_CODE_": entity_code})
        elif isinstance(entity_code, (list, tuple)):
            query_list.append({"ENTITY_CODE_": {"$in": list(entity_code)}})

        if isinstance(exclude_code, str):
            query_list.append({"ENTITY_CODE_": {"$ne": exclude_code}})
        elif isinstance(exclude_code, (list, tuple)):
            query_list.append({"ENTITY_CODE_": {"$nin": list(exclude_code)}})

        if isinstance(other_query, dict):
            query_list.append(other_query)
        elif isinstance(other_query, list):
            query_list.extend(other_query)

        if data_id:
            find_id = ObjectId(data_id)
            query_list.append({"_id": {"$gte": find_id}})

        if query_list:
            query = {"$and": query_list}
        else:
            query = {}
        try:
            mon_logger.info(f"query={query}")
            result_one = collection.find_one_and_update(query,
                                                        update_dict,
                                                        sort=sort_query)
            if result_one:
                mon_logger.info(f"MongoDB--数据查取并更新成功")
                return result_one
            else:
                mon_logger.info("MongoDB 数据查取为空".format(
                    self.mongo_entity_code))
                return None
        except TypeError as e:
            mon_logger.error("MongoDB数据查取失败,错误信息为{}, 请检查 {}".format(
                e, self.mongo_entity_code))
        except pymongo.errors.ServerSelectionTimeoutError as e:
            mon_logger.info("MongoDB 连接超时 {}, 正在重新连接...".format(e))
            result_one = collection.find_one_and_update(query, update_dict)
            if result_one:
                mon_logger.info("MongoDB--数据查取并更新成功")
                return result_one
            else:
                mon_logger.info("MongoDB 数据查取为空".format(
                    self.mongo_entity_code))
                return None