Exemplo n.º 1
0
 def running(self):
     sh = StockHolder()
     count_cond = sh.verify_cond()
     count = 0
     print('\n{2}\n{1}剩余{0}家企业股东数据待采集!{1}\n{2}\n'.format(
         count_cond, '*' * 20, '*' * 63))
     while count_cond > 0:
         print('Loading......\n')
         time.sleep(3)
         print('开始新一轮采集')
         result = sh.get_com_id()
         com_id = result[0]
         com_name = result[1]
         count_sh = sh.count_sh_judge(com_id)
         status_column = 'status_stockholder'
         count_column = 'count_stockholder'
         # sh.gm.upd_status(com_id, status_column, count_column, count_sh)
         sh_page_count = sh.sh_page_judge(count_sh)
         for page in range(1, sh_page_count + 1):
             count += 1
             tree = sh.get_page_req(com_id, com_name, page)
             sh.parse_info(tree, com_id, com_name, page, sh_page_count)
             gm().upd_status(com_id, status_column, count_column, count_sh)
             # input('Pause!')
         count_cond = sh.verify_cond()
         print('\n{2}\n{1}剩余{0}家企业股东数据待采集!{1}\n{2}\n'.format(
             count_cond, '*' * 20, '*' * 63))
     print('\n数据采集完成!')
Exemplo n.º 2
0
 def __init__(self):
     self.db = db()
     self.dk = dk()
     self.gh = gh()
     self.tm = tm()
     self.gm = gm()
     self.index_url = 'https://www.qcc.com'
Exemplo n.º 3
0
 def count_sh_judge(self,com_id): #根据公司首页股东信息字段判断股东数量,模糊判断,需做二次判断
     sh = StockHolder()
     header = sh.gh.header()
     if com_id == None:
         count_sh = 0
     else:
         com_url = f'{sh.index_url}/firm_{com_id}.html'
         time.sleep(random.randint(3, 5))
         res = requests.get(com_url,headers=header).text
         tree = sh.gm.verify(res)
         try:
             count_sh = tree.xpath('//div[@class="company-nav-items"]/span[contains(text(),"股东信息")]/span/text()|//div[@class="company-nav-items"]/a[@data-pos="partnerslist"]/span/text()')[0]
             if count_sh == '999+':
                 count_sh = 999
             count_sh = int(count_sh)
         except:
             count_sh = 0
     status_column = 'status_stockholder'
     count_column = 'count_stockholder'
     gm().upd_status(com_id, status_column, count_column, count_sh)
     return count_sh
Exemplo n.º 4
0
 def running(self):  #执行该方法使程序整体运行
     mm = MainMember()
     count_cond = mm.verify_cond()
     print('\n{2}\n{1}剩余{0}家企业主要人员数据待采集!{1}\n{2}\n'.format(
         count_cond, '*' * 20, '*' * 63))
     while count_cond > 0:
         print('Loading......\n')
         time.sleep(3)
         print('开始新一轮采集')
         result = mm.get_com_id()
         com_id = result[0]
         info = mm.count_cm_judge(com_id)
         count_mm = info[0]
         tree = info[1]
         mm.parse_info(com_id, tree)
         status_column = 'status_main_member'
         count_column = 'count_main_member'
         gm().upd_status(com_id, status_column, count_column, count_mm)
         count_cond = mm.verify_cond()
         print('\n{2}\n{1}剩余{0}家企业主要人员数据待采集!{1}\n{2}\n'.format(
             count_cond, '*' * 20, '*' * 63))
     print('\n数据采集完成!')
Exemplo n.º 5
0
 def get_count_rc(self, count_rc, key, count,
                  com_id):  #根据模糊判断,到招聘详情页判断出精确的招聘数量
     if count_rc > 0:
         info_url = f'https://www.qichacha.com/company_getinfos?unique={com_id}&companyname={key}&tab=run'
         hds = self.gh.header()
         hds.update(
             {'Referer': f'https://www.qichacha.com/firm_{com_id}.html'})
         time.sleep(random.randint(3, 5))
         res = requests.get(info_url, headers=hds).text
         tree = self.gm.verify(res)
         count_rc = tree.xpath('//a[contains(@onclick,"#joblist")]/text()'
                               )[0].split('招聘')[1].strip()
         count_rc = int(count_rc)
         localtime = tm().get_localtime()  # 当前时间
         print(localtime)
         print(f'计数器:{count}\n公司ID:{com_id}\n招聘岗位数:{count_rc}')
     else:
         count_rc = 0
         res = 0
     status_column = 'status_recruit'  # 表字段名
     count_column = 'count_recruit'  # 表字段名
     gm().upd_status(com_id, status_column, count_column, count_rc)
     return count_rc, res
Exemplo n.º 6
0
 def __init__(self):
     self.rc = RecruitInfo()
     self.db = db()
     self.gh = gh()
     self.gm = gm()
     self.index_url = 'https://www.qichacha.com'
Exemplo n.º 7
0
 def __init__(self):
     self.db = db()
     self.dk = dk()
     self.gh = gh()
     self.gm = gm()
     self.tm = tm()
Exemplo n.º 8
0
 def __init__(self):
     self.db = db()
     self.gh = gh()
     self.gm = gm()
     self.index_url = 'https://www.qichacha.com/'