示例#1
0
 def work(self, spider_cla):
     spider = spider_cla()
     try:
         spider.start()
     except Exception as e:
         logging_except(e, cls_name=spider_cla.__name__+' '+spider.url)
         print(e)
     finally:
         spider.pop()
         spider.close()
示例#2
0
 def start(self):
     logging_except(module='init')
     logging_sql()
     while True:
         main_redis = RedisClient("info_basic_info")
         if not main_redis.get():
             self.main_spider()
         main_redis.close()
         for cls in self.spider_list:
             spider_name = cls.__name__
             redis_key = spider_name.replace("Spider", "info")
             redis_client = RedisClient(redis_key)
             if redis_client.get():
                 self.work(cls)
             redis_client.close()
         printf("Wait for all spider end!")
示例#3
0
 def start(self):
     self.initialization("info_company", "jj_info_company")
     self.url = self.new_url()
     self._mysqlclient = [
         Mysql_Client('jj_info_company'),
         Mysql_Client('jj_info_company_honor'),
         Mysql_Client('jj_info_company_admin'),
         Mysql_Client('jj_info_company_party')
     ]
     source = self._webdriver.get(self.url)
     company = self.jj_info_company(source)
     try:
         self.jj_info_company_honor(company)
     except Exception as e:
         logging_except(e)
         printf("jj_info_company_honor", "NoData!!")
     self.jj_info_company_admin(company)
     self.jj_info_company_party(company)
     pass
示例#4
0
            if not main_redis.get():
                self.main_spider()
            main_redis.close()
            for cls in self.spider_list:
                spider_name = cls.__name__
                redis_key = spider_name.replace("Spider", "info")
                redis_client = RedisClient(redis_key)
                if redis_client.get():
                    self.work(cls)
                redis_client.close()
            printf("Wait for all spider end!")

    def work(self, spider_cla):
        spider = spider_cla()
        try:
            spider.start()
        except Exception as e:
            logging_except(e,
                           cls_name=spider_cla.__name__ + ':' +
                           str(spider.url))
            print(e)
        finally:
            spider.pop()
            spider.close()


if __name__ == "__main__":
    logging_except("????", cls_name="dsadsadsa")
    # program = Schedule()
    # program.start()
示例#5
0
 def start(self):
     self.initialization("info_all_gonggao", "jj_info_all_gonggao")
     self._webdriver.get(self.url)
     js = "window.open('%s');"
     # start
     next_botton = "//div[@id='pagebar']//label[last()]"
     current_xpath = "//div[@id='pagebar']//label[@value='%s']"
     pages_num = self._webdriver.find_element_by_xpath(
         "//div[@id='pagebar']//label[last()-1]").text
     current_page = 0
     amount = 0
     while int(current_page) < int(pages_num):
         current_page += 1
         try:
             self._webdriver.find_element_by_xpath(current_xpath %
                                                   current_page).click()
         except Exception as e:
             logging_except(e)
             self._webdriver.find_element_by_xpath(next_botton).click()
         time.sleep(3)
         table = BeautifulSoup(
             self._webdriver._brower.page_source,
             'lxml').find('div', id='ggtable').find('tbody').find_all('tr')
         for tr in table:
             tds = tr.find_all('td')
             if len(tds) == 3:
                 amount += 1
                 values = [self.code]
                 title = tds[0].text.replace(' ', '').replace('\n', '')
                 report_type = tds[1].text.replace(' ',
                                                   '').replace('\n', '')
                 date = tds[-1].text.replace(' ', '').replace('\n', '')
                 href = tds[0].find('a')['href']
                 for td in [title, report_type, date]:
                     values.append(td)
                 self._webdriver._brower.execute_script(js % href)
                 self._webdriver._brower.switch_to_window(
                     self._webdriver._brower.window_handles[1])
                 time.sleep(3)
                 while not BeautifulSoup(
                         self._webdriver._brower.page_source, "lxml").find(
                             'pre', id='jjggzwcontentbody'):
                     time.sleep(3)
                 values.append(
                     special_repace(
                         BeautifulSoup(self._webdriver._brower.page_source,
                                       "lxml").find(
                                           'pre',
                                           id='jjggzwcontentbody').text))
                 time.sleep(3)
                 self._webdriver._brower.close()
                 self._webdriver._brower.switch_to_window(
                     self._webdriver._brower.window_handles[0])
                 sel = ("code", self.code, "title", title, 'date', date,
                        'type', report_type)
                 self.storage(values, sel)
                 amount += 1
             else:
                 pass
         # botton = self._webdriver.find_element_by_xpath(next_botton)
         # time.sleep(3)
         # botton.click()
         # time.sleep(3)
     # end
     printf("jj_info_all_gonggao storage ,CODE:%s,AMOUNT:%s" %
            (self.code, amount))