def search_account_from_mongo(self): ''' 从数据库查询账号用于登录 :return: ''' red_db = C.redis_db(db=7) try: try: account_info = json.loads(red_db.rpop('tyc_account_lists')) except Exception as e: print(e) account_info = None if account_info: self.account_id = account_info['_id'] # 账号 self.account_num = account_info['account_name'] log.info("[INFO]: 正在使用账号{}".format(self.account_num)) # 密码 self.password = account_info["password"] else: log.info("数据库没有符合要求的账号") self.driver.quit() quit() except Exception as e: log.info("[ERROR]: {}".format(e)) log.info("[ERROR]: 数据库查询账号失败")
def limit_condition_search(self): """ 条件限定处理 :return: """ # 从数据库取出一个人名 self.search_name_from_mongo() log.info("[INFO]: 正在获取人名详情:{}".format(self.man_name)) log.info('公司总数:{}'.format(self.total)) lists = [] first_conditions = [ 'or0100', 'or100200', 'or200500', 'or5001000', 'or1000' ] now_num = 0 for _ in range(len(first_conditions)): first_num = self.get_name_to_search(first_conditions[_]) if first_num is None: log.error('获取公司数目失败') break now_num += int(first_num) log.info('条件[{}]下当前公司数量:{}'.format(first_conditions[_], first_num)) surplus_num = self.total - now_num log.info('剩余公司数量:{}'.format(surplus_num)) if int(first_num) > 10000: """第二种条件""" second_lists = self.second_limit_condition( int(first_num), first_conditions[_]) if second_lists: lists.extend(second_lists) else: log.error('第二种条件失败') else: lists.append({'total': int(first_num), 'url': self.name_url}) if surplus_num < 10000: if _ <= 2: condition = first_conditions[_][-3:] else: condition = first_conditions[_][-4:] self.name_url = "https://www.tianyancha.com/search/or{}5000?key={}".format( condition, self.label_name) lists.append({'total': int(surplus_num), 'url': self.name_url}) break else: continue print({'name': self.man_name, 'data': lists}) # data = json.dumps({lists}, ensure_ascii=False) if lists: # 数据写进缓存 re_db = C.redis_db() re_db.rset(self.man_name, json.dumps({lists}, ensure_ascii=False)) # 修改名字状态 self.db.name_results.update_one({'name': self.man_name}, {'$set': { "flag": 1 }})
def search_name_from_mongo(self): ''' 从数据库查询人名 :return: ''' # skip_count = self.skip_count red_db = C.redis_db(db=3) while True: try: #添加新字段 # cursor = self.db.name_results.find({"name_num": {'$exists': False}}).skip(skip_count).limit(1) try: name_info = json.loads(red_db.rpop('tyc_name_lists')) res = self.name_results.find_one({'_id': name_info['_id']}) if res.get('name_num') is None: continue except Exception as e: print(e) name_info = None # skip_count +=4 # log.info(cursor) if name_info: # _id self._id = name_info["_id"] # 人名 self.man_name = name_info["name"] self.last_name = name_info["last name"] if len(self.man_name) > 10: #切割取值 self.handle_long_name() continue else: break log.info("[INFO]: 姓名:{}".format(self.man_name)) else: log.info("数据库没有符合要求的人名") self.driver.quit() quit() except Exception as e: log.info("[ERROR]: {}".format(e)) log.info("[ERROR]: 数据库查询人名失败") self.driver.quit()
def search_name_from_mongo(self): ''' 从数据库查询人名 :return: ''' # skip_count = self.skip_count red_db = C.redis_db(db=13) while True: try: #添加新字段 try: name_info = json.loads(red_db.rpop('tyc_name_lists')) res = self.name_results.find_one({'_id': name_info['_id']}) if res.get('name_num'): print('已爬取') continue except Exception as e: print(e) name_info = None if name_info: # _id self._id = name_info["_id"] # 人名 self.man_name = name_info["name"] self.last_name = name_info["last name"] if len(self.man_name) > 10: #切割取值 self.handle_long_name() continue else: break log.info("姓名:{}".format(self.man_name)) else: log.info("数据库没有符合要求的人名") self.driver.quit() quit() except Exception as e: log.error(e) log.error("数据库查询人名失败") time.sleep(5)