Exemplo n.º 1
0
 def search_account_from_mongo(self):
     '''
     从数据库查询账号用于登录
     :return:
     '''
     red_db = C.redis_db(db=7)
     try:
         try:
             account_info = json.loads(red_db.rpop('tyc_account_lists'))
         except Exception as e:
             print(e)
             account_info = None
         if account_info:
             self.account_id = account_info['_id']
             # 账号
             self.account_num = account_info['account_name']
             log.info("[INFO]: 正在使用账号{}".format(self.account_num))
             # 密码
             self.password = account_info["password"]
         else:
             log.info("数据库没有符合要求的账号")
             self.driver.quit()
             quit()
     except Exception as e:
         log.info("[ERROR]: {}".format(e))
         log.info("[ERROR]: 数据库查询账号失败")
Exemplo n.º 2
0
 def limit_condition_search(self):
     """
     条件限定处理
     :return:
     """
     # 从数据库取出一个人名
     self.search_name_from_mongo()
     log.info("[INFO]: 正在获取人名详情:{}".format(self.man_name))
     log.info('公司总数:{}'.format(self.total))
     lists = []
     first_conditions = [
         'or0100', 'or100200', 'or200500', 'or5001000', 'or1000'
     ]
     now_num = 0
     for _ in range(len(first_conditions)):
         first_num = self.get_name_to_search(first_conditions[_])
         if first_num is None:
             log.error('获取公司数目失败')
             break
         now_num += int(first_num)
         log.info('条件[{}]下当前公司数量:{}'.format(first_conditions[_], first_num))
         surplus_num = self.total - now_num
         log.info('剩余公司数量:{}'.format(surplus_num))
         if int(first_num) > 10000:
             """第二种条件"""
             second_lists = self.second_limit_condition(
                 int(first_num), first_conditions[_])
             if second_lists:
                 lists.extend(second_lists)
             else:
                 log.error('第二种条件失败')
         else:
             lists.append({'total': int(first_num), 'url': self.name_url})
         if surplus_num < 10000:
             if _ <= 2:
                 condition = first_conditions[_][-3:]
             else:
                 condition = first_conditions[_][-4:]
             self.name_url = "https://www.tianyancha.com/search/or{}5000?key={}".format(
                 condition, self.label_name)
             lists.append({'total': int(surplus_num), 'url': self.name_url})
             break
         else:
             continue
     print({'name': self.man_name, 'data': lists})
     # data = json.dumps({lists}, ensure_ascii=False)
     if lists:
         # 数据写进缓存
         re_db = C.redis_db()
         re_db.rset(self.man_name, json.dumps({lists}, ensure_ascii=False))
         #  修改名字状态
         self.db.name_results.update_one({'name': self.man_name},
                                         {'$set': {
                                             "flag": 1
                                         }})
Exemplo n.º 3
0
 def search_name_from_mongo(self):
     '''
     从数据库查询人名
     :return:
     '''
     # skip_count = self.skip_count
     red_db = C.redis_db(db=3)
     while True:
         try:
             #添加新字段
             # cursor = self.db.name_results.find({"name_num": {'$exists': False}}).skip(skip_count).limit(1)
             try:
                 name_info = json.loads(red_db.rpop('tyc_name_lists'))
                 res = self.name_results.find_one({'_id': name_info['_id']})
                 if res.get('name_num') is None:
                     continue
             except Exception as e:
                 print(e)
                 name_info = None
             # skip_count +=4
             # log.info(cursor)
             if name_info:
                 # _id
                 self._id = name_info["_id"]
                 # 人名
                 self.man_name = name_info["name"]
                 self.last_name = name_info["last name"]
                 if len(self.man_name) > 10:
                     #切割取值
                     self.handle_long_name()
                     continue
                 else:
                     break
                     log.info("[INFO]: 姓名:{}".format(self.man_name))
             else:
                 log.info("数据库没有符合要求的人名")
                 self.driver.quit()
                 quit()
         except Exception as e:
             log.info("[ERROR]: {}".format(e))
             log.info("[ERROR]: 数据库查询人名失败")
             self.driver.quit()
 def search_name_from_mongo(self):
     '''
     从数据库查询人名
     :return:
     '''
     # skip_count = self.skip_count
     red_db = C.redis_db(db=13)
     while True:
         try:
             #添加新字段
             try:
                 name_info = json.loads(red_db.rpop('tyc_name_lists'))
                 res = self.name_results.find_one({'_id': name_info['_id']})
                 if res.get('name_num'):
                     print('已爬取')
                     continue
             except Exception as e:
                 print(e)
                 name_info = None
             if name_info:
                 # _id
                 self._id = name_info["_id"]
                 # 人名
                 self.man_name = name_info["name"]
                 self.last_name = name_info["last name"]
                 if len(self.man_name) > 10:
                     #切割取值
                     self.handle_long_name()
                     continue
                 else:
                     break
                     log.info("姓名:{}".format(self.man_name))
             else:
                 log.info("数据库没有符合要求的人名")
                 self.driver.quit()
                 quit()
         except Exception as e:
             log.error(e)
             log.error("数据库查询人名失败")
             time.sleep(5)