Пример #1
0
    def login(self):
        """
        @return: 1 成功 0 失败
        """

        try:
            # 预检查
            if not self.is_time_to_login():
                log.info("此账号尚未到登陆时间: {}".format(self.username))
                time.sleep(5)
                return 0

            cookies = self.create_cookie()
            if not cookies:
                raise Exception("登陆失败 未获取到合法cookie")

            if not isinstance(cookies, dict):
                raise Exception("cookie 必须为字典格式")

            # 保存cookie
            self.set_login_time()
            self.set_cookies(cookies)
            log.info("登录成功 {}".format(self.username))
            self.record_user_status(LimitTimesUserStatus.LOGIN_SUCCESS)
            return 1

        except Exception as e:
            log.exception(e)
            send_msg(
                msg=f"{self.SITE_NAME} {self.username} 账号登陆异常 exception: {str(e)}",
                level="error",
                message_prefix=f"{self.SITE_NAME} {self.username} 账号登陆异常",
            )

        log.info("登录失败 {}".format(self.username))
        self.record_user_status(LimitTimesUserStatus.LOGIN_FALIED)
        return 0
Пример #2
0
 def send_msg(self, msg, level="debug", message_prefix=""):
     # log.debug("发送报警 level:{} msg{}".format(level, msg))
     tools.send_msg(msg=msg, level=level, message_prefix=message_prefix)
Пример #3
0
    def run(self, username=None):
        while True:
            try:
                with RedisLock(key=self._tab_user_pool,
                               lock_timeout=3600,
                               wait_timeout=0) as _lock:
                    if _lock.locked:
                        self.__sycn_users_info()
                        online_user = 0
                        for user in self.users:
                            if username and username != user.username:
                                continue

                            try:
                                if user.cookies:
                                    online_user += 1
                                    continue

                                # 预检查
                                if not user.is_time_to_login():
                                    log.info("账号{}与上次登录时间间隔过短,暂不登录: 将在{}登录使用".
                                             format(user.username,
                                                    user.next_login_time()))
                                    continue

                                user = self.login(user)
                                if user.cookies:
                                    # 保存cookie
                                    user.set_login_time()
                                    self.add_user(user)
                                    self.record_user_status(
                                        user.user_id,
                                        GoldUserStatus.LOGIN_SUCCESS)
                                    log.debug("登录成功 {}".format(user.username))
                                    online_user += 1
                                else:
                                    log.info("登录失败 {}".format(user.username))
                                    self.record_user_status(
                                        user.user_id,
                                        GoldUserStatus.LOGIN_FALIED)
                            except NotImplementedError:
                                log.error(
                                    f"{self.__class__.__name__} must be implementation login method!"
                                )
                                os._exit(0)
                            except Exception as e:
                                log.exception(e)
                                msg = f"{user.username} 账号登陆失败 exception: {str(e)}"
                                log.info(msg)
                                self.record_user_status(
                                    user.user_id, GoldUserStatus.LOGIN_FALIED)

                                send_msg(
                                    msg=msg,
                                    level="error",
                                    message_prefix=f"{user.username} 账号登陆失败",
                                )

                        log.info("当前在线user数为 {}".format(online_user))

                if self._keep_alive:
                    time.sleep(10)
                else:
                    break

            except Exception as e:
                log.exception(e)
                time.sleep(1)
Пример #4
0
    def __add_item_to_db(self, items, update_items, requests, callbacks,
                         items_fingerprints):
        export_success = True
        self._is_adding_to_db = True

        # 去重
        if setting.ITEM_FILTER_ENABLE:
            items, items_fingerprints = self.__dedup_items(
                items, items_fingerprints)

        # 分捡
        items_dict = self.__pick_items(items)
        update_items_dict = self.__pick_items(update_items,
                                              is_update_item=True)

        # item批量入库
        failed_items = {"add": [], "update": [], "requests": []}
        while items_dict:
            table, datas = items_dict.popitem()

            log.debug("""
                -------------- item 批量入库 --------------
                表名: %s
                datas: %s
                    """ % (table, tools.dumps_json(datas, indent=16)))

            if not self.__export_to_db(table, datas):
                export_success = False
                failed_items["add"].append({"table": table, "datas": datas})

        # 执行批量update
        while update_items_dict:
            table, datas = update_items_dict.popitem()

            log.debug("""
                -------------- item 批量更新 --------------
                表名: %s
                datas: %s
                    """ % (table, tools.dumps_json(datas, indent=16)))

            update_keys = self._item_update_keys.get(table)
            if not self.__export_to_db(
                    table, datas, is_update=True, update_keys=update_keys):
                export_success = False
                failed_items["update"].append({"table": table, "datas": datas})

        if export_success:
            # 执行回调
            while callbacks:
                try:
                    callback = callbacks.pop(0)
                    callback()
                except Exception as e:
                    log.exception(e)

            # 删除做过的request
            if requests:
                self.redis_db.zrem(self._table_request, requests)

            # 去重入库
            if setting.ITEM_FILTER_ENABLE:
                if items_fingerprints:
                    self.__class__.dedup.add(items_fingerprints,
                                             skip_check=True)
        else:
            failed_items["requests"] = requests

            if self.export_retry_times > setting.EXPORT_DATA_MAX_RETRY_TIMES:
                if self._redis_key != "air_spider":
                    # 失败的item记录到redis
                    self.redis_db.sadd(self._table_failed_items, failed_items)

                    # 删除做过的request
                    if requests:
                        self.redis_db.zrem(self._table_request, requests)

                    log.error("入库超过最大重试次数,不再重试,数据记录到redis,items:\n {}".format(
                        tools.dumps_json(failed_items)))
                self.export_retry_times = 0

            else:
                tip = ["入库不成功"]
                if callbacks:
                    tip.append("不执行回调")
                if requests:
                    tip.append("不删除任务")
                    exists = self.redis_db.zexists(self._table_request,
                                                   requests)
                    for exist, request in zip(exists, requests):
                        if exist:
                            self.redis_db.zadd(self._table_request, requests,
                                               300)

                if setting.ITEM_FILTER_ENABLE:
                    tip.append("数据不入去重库")

                if self._redis_key != "air_spider":
                    tip.append("将自动重试")

                tip.append("失败items:\n {}".format(
                    tools.dumps_json(failed_items)))
                log.error(",".join(tip))

                self.export_falied_times += 1

                if self._redis_key != "air_spider":
                    self.export_retry_times += 1

            if self.export_falied_times > setting.EXPORT_DATA_MAX_FAILED_TIMES:
                # 报警
                msg = "《{}》爬虫导出数据失败,失败次数:{},请检查爬虫是否正常".format(
                    self._redis_key, self.export_falied_times)
                log.error(msg)
                tools.send_msg(
                    msg=msg,
                    level="error",
                    message_prefix="《%s》爬虫导出数据失败" % (self._redis_key),
                )

        self._is_adding_to_db = False