def search_account_from_mongo(self, log):
     '''
     从数据库查询账号用于登录
     :return:
     '''
     num = 10
     while num:
         try:
             try:
                 account_info = json.loads(
                     redis_conn.rpop('tyc_account_lists').decode())
             except Exception as e:
                 print(e)
                 account_info = None
             if account_info:
                 self.account_id = account_info['_id']
                 # self.account_id = account_info['_id']
                 # 账号
                 self.account_num = account_info['account_name']
                 # self.account_num = 15144581447
                 log.info("[INFO]: 正在使用账号{}".format(self.account_num))
                 # 密码
                 self.password = account_info["password"]
                 break
             else:
                 num -= 1
                 sleep(1)
                 continue
         except Exception as e:
             log.info("[ERROR]: {}".format(e))
             log.info("[ERROR]: 数据库查询账号失败")
     if num == 0:
         log.info("数据库没有符合要求的账号")
         self.driver.quit()
         quit()
示例#2
0
 def get_name_from_redis(self):
     """
     获取搜索队列模块
     :return:返回姓名信息包括_id,name
     :rtype:dict or None
     """
     while True:
         # 从姓名队列获取一条数据
         try:
             name_info = json.loads(
                 redis_conn.rpop('tyc_name_lists').decode())
             # 去数据库查询该条数据
             res = name_results_coll.find_one({'_id': name_info['_id']})
             # 如果flag !=0 表示已经抓取过,重新获取一条数据
             if res.get('flag'):
                 print('已爬取')
                 continue
         except Exception as e:
             self.log.error("数据库查询人名失败, ERROR:{}".format(e))
             time.sleep(30)
             continue
         # 如果数据存在返回信息(_id,name)
         if name_info:
             self.log.info("姓名:{}".format(name_info['name']))
             if len(name_info['name']) > 3:
                 # 修改标记位(不能爬取)
                 collection = {'_id': name_info['_id']}
                 query = {'$set': {'flag': 3}}
                 self.update_database_status('name', collection, query)
                 continue
             return {
                 'name_id': name_info['_id'],
                 'name': name_info['name'],
             }
         else:
             # 查询条件,跟写入姓名队列的查询条件一致
             query = {
                 'company_numm': {
                     '$lte': 100,
                     '$gt': 0
                 },
                 'flag': {
                     '$ne': 1
                 }
             }
             one = name_results_coll.find_one(query)
             # 如果有数据说名队列补充还没有完成,等一分钟后重试,否则表示该条件下的姓名已经抓取完成,程序结束
             if one:
                 sleep(60)
                 self.log.info('缓存中暂时没有数据,60s后重试...')
                 continue
             else:
                 self.log.info("数据库没有符合要求的人名")
                 self.driver.quit()
 def search_name_from_mongo(self, log):
     '''
     从数据库查询人名
     :return:
     '''
     # skip_count = self.skip_count
     while True:
         try:
             #添加新字段
             try:
                 name_info = json.loads(
                     redis_conn.rpop('tyc_name_lists').decode())
                 res = name_results_coll.find_one({'_id': name_info['_id']})
                 if res.get('flag'):
                     print('已爬取')
                     continue
             except Exception as e:
                 print(e)
                 name_info = None
             if name_info:
                 # _id
                 self._id = name_info["_id"]
                 # 人名
                 self.man_name = name_info["name"]
                 self.last_name = name_info["last name"]
                 if len(self.man_name) > 10:
                     #切割取值
                     self.handle_long_name(log)
                     continue
                 else:
                     log.info("姓名:{}".format(self.man_name))
                     break
             else:
                 one = name_results_coll.find_one(
                     {"name_num": {
                         '$exists': False
                     }})
                 if one:
                     sleep(60)
                     log.info('缓存中暂时没有数据,60s后重试...')
                     continue
                 else:
                     log.info("数据库没有符合要求的人名")
                     self.driver.quit()
                     quit()
         except Exception as e:
             log.error(e)
             log.error("数据库查询人名失败")
             time.sleep(5)
示例#4
0
 def get_account_from_redis(self):
     """
     获取账号队列模块
     :return:返回账号信息(_id, 账号, 密码)
     :rtype:dict
     """
     while True:
         # 从队列获取一条账号信息,如果失败获取次数减1,从新获取直到获取成功
         try:
             account_info = json.loads(
                 redis_conn.rpop('tyc_account_lists').decode())
         except Exception as e:
             self.log.error(e)
             self.log.error("数据库没有符合要求的账号或者redis出现错误, ERROR:{}".format(e))
             sleep(10)
             continue
         # 如果获取到账号信息则返回 _id, 账号, 密码
         if account_info:
             return {
                 'account_id': account_info['_id'],
                 'account_name': account_info['account_name'],
                 'password': account_info['password'],
             }