示例#1
0
 def check_chapter_id(cls, _id):
     """查找数据库是否存在chapter_id
     :param _id: 传入的chapter_id参数"""
     sql = 'SELECT EXISTS(SELECT 1 from `笔趣阁章节内容` WHERE `chapter_id`=%(id)s)'
     value = {'id': _id}
     try:
         cur.execute(sql, value)
         return cur.fetchone()[0]
     except Exception as e:
         logger.warning('当前出现一个SQL Error: {}'.format(e))
         return 1
 def process_request(cls, request, spider):
     headers = {
         'Accept': '*/*',
         'Accept-Language': 'en-US,en;q=0.8',
         'Cache-Control': 'max-age=0',
         'User-Agent': spider.settings['USER_AGENT'],
         'Connection': 'keep-alive',
     }
     desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
     for key, value in headers.iteritems():
         desired_capabilities['phantomjs.page.customHeaders.{}'.format(
             key)] = value
     desired_capabilities[
         'phantomjs.page.customHeaders.User-Agent'] = spider.settings[
             'USER_AGENT']
     if spider.settings['USE_PROXY']:
         service_args = [
             '--proxy={ip}:{port}'.format(**cls.get_proxy()),
             '--proxy-type=http',
         ]
     else:
         service_args = []
     driver = webdriver.PhantomJS(
         executable_path=spider.settings['PHANTOMJS_PATH'],
         desired_capabilities=desired_capabilities,
         service_args=service_args)
     # 隐式等待5秒,可以自己调节
     driver.implicitly_wait(20)
     # 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项
     # 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
     driver.set_page_load_timeout(20)
     # 设置10秒脚本超时时间
     check = True
     driver.set_script_timeout(10)
     driver.get(request.url)
     i = 1
     while not driver.execute_script(
             'return document.readyState') == 'complete' and i < 20:
         logger.warning('sleep 1')
         time.sleep(1)
         i += 1
     js = "var q=document.documentElement.scrollTop=10000"
     driver.execute_script(js)  # 可执行js,模仿用户操作。此处为将页面拉至最底端。
     body = driver.page_source
     logger.warning(u"访问" + request.url)
     url = driver.current_url
     driver.close()
     driver.service.process.send_signal(signal.SIGTERM)
     driver.quit()
     return HtmlResponse(url, body=body, encoding='utf-8', request=request)
示例#3
0
 def process_item(self, item, spider):
     try:
         if isinstance(item, CommitItem):
             self.conn.commit()
         elif isinstance(item, BriefItem):
             self.cursor.execute(self.brief_sql,
                                 (item["code"], item["train_no"],
                                  item["start"], item["end"], item["turn"]))
         elif isinstance(item, InfoItem):
             self.cursor.execute(
                 self.info_sql,
                 (item["train_no"], item["no"], item["station"],
                  item["start_time"], item["arrive_time"],
                  item["stopover_time"], item["turn"]))
         else:
             self.cursor.execute(self.turn_sql,
                                 (item["id"], item["mark_time"]))
             self.conn.commit()
     except Exception, e:
         logger.warning("excute sql fail.")
         logger.warning(str(e))
示例#4
0
 def process_item(self, item, spider):
     try:
         self.cursor.execute(self.sql, (item['crawl_time'], item['key_word']))
         self.conn.commit()
     except Exception , e:
         logger.warning("execute sql fail.")
         logger.warning(str(e))
         logger.warning(item)
示例#5
0
    def process_item(self, item, spider):
        try:
            if isinstance(item, CommitItem):
                self.conn.commit()
            else:

                self.cursor.execute(
                    self.sql,
                    (item["province"], item["city"], item["county"],
                     item["address"], item["name"], item["windows"],
                     item["start"] + u'00', item["end"] + u'00', item["turn"]))
        except Exception, e:
            logger.warning("execute sql fail.")
            logger.warning(str(e))
            logger.warning(item)
示例#6
0
 def _upload_file(self, path, buf):
     logger.warning('now i will upload the image {}'.format(path))
     self._bucket.put_object(key=path, data=buf.getvalue())