Exemplo n.º 1
0
        except (requests.exceptions.RequestException,
                requests.packages.urllib3.exceptions.HTTPError), e:
            logging.debug(
                '   ***> [%-30s] Page fetch failed using requests: %s' %
                (self.feed, e))
            # mail_feed_error_to_admin(self.feed, e, local_vars=locals())
            return self.fetch_page(urllib_fallback=True, requests_exception=e)
        except Exception, e:
            logging.debug('[%d] ! -------------------------' %
                          (self.feed.id, ))
            tb = traceback.format_exc()
            logging.debug(tb)
            logging.debug('[%d] ! -------------------------' %
                          (self.feed.id, ))
            self.feed.save_page_history(500, "Error", tb)
            mail_feed_error_to_admin(self.feed, e, local_vars=locals())
            if not urllib_fallback:
                self.fetch_page(urllib_fallback=True)
        else:
            self.feed.save_page_history(200, "OK")

        return html

    def save_no_page(self):
        self.feed.has_page = False
        self.feed.save()
        self.feed.save_page_history(404, "Feed has no original page.")

    def rewrite_page(self, response):
        BASE_RE = re.compile(r'<head(.*?\>)', re.I)
        base_code = u'<base href="%s" />' % (self.feed.feed_link, )
Exemplo n.º 2
0
            except TimeoutError, e:
                logging.debug("   ---> [%-30s] ~FRFeed fetch timed out..." % (feed.title[:30]))
                feed.save_feed_history(505, "Timeout", "")
                feed_code = 505
                fetched_feed = None
            except Exception, e:
                logging.debug("[%d] ! -------------------------" % (feed_id,))
                tb = traceback.format_exc()
                logging.error(tb)
                logging.debug("[%d] ! -------------------------" % (feed_id,))
                ret_feed = FEED_ERREXC
                feed = self.refresh_feed(feed.pk)
                feed.save_feed_history(500, "Error", tb)
                feed_code = 500
                fetched_feed = None
                mail_feed_error_to_admin(feed, e, local_vars=locals())

            if not feed_code:
                if ret_feed == FEED_OK:
                    feed_code = 200
                elif ret_feed == FEED_SAME:
                    feed_code = 304
                elif ret_feed == FEED_ERRHTTP:
                    feed_code = 400
                if ret_feed == FEED_ERREXC:
                    feed_code = 500
                elif ret_feed == FEED_ERRPARSE:
                    feed_code = 550
                elif ret_feed == FEED_ERRPARSE:
                    feed_code = 550
Exemplo n.º 3
0
 except Feed.DoesNotExist, e:
     logging.debug('   ---> [%-30s] Feed is now gone...' % (unicode(feed_id)[:30]))
     continue
 except TimeoutError, e:
     logging.debug('   ---> [%-30s] Feed fetch timed out...' % (unicode(feed)[:30]))
     feed.save_feed_history(505, 'Timeout', '')
     fetched_feed = None
 except Exception, e:
     logging.debug('[%d] ! -------------------------' % (feed_id,))
     tb = traceback.format_exc()
     logging.error(tb)
     logging.debug('[%d] ! -------------------------' % (feed_id,))
     ret_feed = FEED_ERREXC 
     feed.save_feed_history(500, "Error", tb)
     fetched_feed = None
     mail_feed_error_to_admin(feed, e)
 
 feed = self.refresh_feed(feed_id)
 if ((self.options['force']) or 
     (fetched_feed and
      feed.feed_link and
      (ret_feed == FEED_OK or
       (ret_feed == FEED_SAME and feed.stories_last_month > 10)))):
       
     logging.debug(u'   ---> [%-30s] Fetching page: %s' % (unicode(feed)[:30], feed.feed_link))
     page_importer = PageImporter(feed.feed_link, feed)
     try:
         page_importer.fetch_page()
     except TimeoutError, e:
         logging.debug('   ---> [%-30s] Page fetch timed out...' % (unicode(feed)[:30]))
         feed.save_page_history(555, 'Timeout', '')
Exemplo n.º 4
0
                continue
            except TimeoutError, e:
                logging.debug('   ---> [%-30s] Feed fetch timed out...' %
                              (unicode(feed)[:30]))
                feed.save_feed_history(505, 'Timeout', '')
                fetched_feed = None
            except Exception, e:
                logging.debug('[%d] ! -------------------------' % (feed_id, ))
                tb = traceback.format_exc()
                logging.error(tb)
                logging.debug('[%d] ! -------------------------' % (feed_id, ))
                ret_feed = FEED_ERREXC
                feed = self.refresh_feed(feed_id)
                feed.save_feed_history(500, "Error", tb)
                fetched_feed = None
                mail_feed_error_to_admin(feed, e)

            feed = self.refresh_feed(feed_id)
            if ((self.options['force']) or
                (fetched_feed and feed.feed_link and feed.has_page and
                 (ret_feed == FEED_OK or
                  (ret_feed == FEED_SAME and feed.stories_last_month > 10)))):

                logging.debug(u'   ---> [%-30s] ~FYFetching page: %s' %
                              (unicode(feed)[:30], feed.feed_link))
                page_importer = PageImporter(feed)
                try:
                    page_importer.fetch_page()
                except TimeoutError, e:
                    logging.debug(
                        '   ---> [%-30s] ~FRPage fetch timed out...' %
Exemplo n.º 5
0
         fp = feedparser.parse(self.feed.feed_address)
         self.feed.feed_link = fp.feed.get('link', "")
         self.feed.save()
     except (urllib2.HTTPError), e:
         self.feed.save_page_history(e.code, e.msg, e.fp.read())
         return
     except (httplib.IncompleteRead), e:
         self.feed.save_page_history(500, "IncompleteRead", e)
         return
     except Exception, e:
         logging.debug('[%d] ! -------------------------' % (self.feed.id,))
         tb = traceback.format_exc()
         logging.debug(tb)
         logging.debug('[%d] ! -------------------------' % (self.feed.id,))
         self.feed.save_page_history(500, "Error", tb)
         mail_feed_error_to_admin(self.feed, e)
         return
     
     self.feed.save_page_history(200, "OK")
 
 def rewrite_page(self, response):
     BASE_RE = re.compile(r'<head(.*?\>)', re.I)
     base_code = u'<base href="%s" />' % (self.feed.feed_link,)
     try:
         html = BASE_RE.sub(r'<head\1 '+base_code, response)
     except:
         response = response.decode('latin1').encode('utf-8')
         html = BASE_RE.sub(r'<head\1 '+base_code, response)
     
     if '<base href' not in html:
         html = "%s %s" % (base_code, html)
Exemplo n.º 6
0
                return
        except (ValueError, urllib2.URLError, httplib.BadStatusLine, httplib.InvalidURL,
                requests.exceptions.ConnectionError), e:
            self.feed.save_page_history(401, "Bad URL", e)
            fp = feedparser.parse(self.feed.feed_address)
            feed_link = fp.feed.get('link', "")
            self.feed.save()
        except (urllib2.HTTPError), e:
            self.feed.save_page_history(e.code, e.msg, e.fp.read())
        except (httplib.IncompleteRead), e:
            self.feed.save_page_history(500, "IncompleteRead", e)
        except (requests.exceptions.RequestException, 
                LookupError, 
                requests.packages.urllib3.exceptions.HTTPError), e:
            logging.debug('   ***> [%-30s] Page fetch failed using requests: %s' % (self.feed, e))
            mail_feed_error_to_admin(self.feed, e, locals())
            return self.fetch_page(urllib_fallback=True, requests_exception=e)
        except Exception, e:
            logging.debug('[%d] ! -------------------------' % (self.feed.id,))
            tb = traceback.format_exc()
            logging.debug(tb)
            logging.debug('[%d] ! -------------------------' % (self.feed.id,))
            self.feed.save_page_history(500, "Error", tb)
            mail_feed_error_to_admin(self.feed, e, locals())
            if not urllib_fallback:
                self.fetch_page(urllib_fallback=True)
        else:
            self.feed.save_page_history(200, "OK")

    def save_no_page(self):
        self.feed.has_page = False
Exemplo n.º 7
0
            self.feed.save()
        except (urllib2.HTTPError), e:
            self.feed.save_page_history(e.code, e.msg, e.fp.read())
            return
        except (httplib.IncompleteRead), e:
            self.feed.save_page_history(500, "IncompleteRead", e)
            return
        except Exception, e:
            logging.debug('[%d] ! -------------------------' %
                          (self.feed.id, ))
            tb = traceback.format_exc()
            logging.debug(tb)
            logging.debug('[%d] ! -------------------------' %
                          (self.feed.id, ))
            self.feed.save_page_history(500, "Error", tb)
            mail_feed_error_to_admin(self.feed, e)
            return

        self.feed.save_page_history(200, "OK")

    def save_no_page(self):
        self.feed.has_page = False
        self.feed.save()
        self.feed.save_page_history(404, "Feed has no original page.")

    def rewrite_page(self, response):
        BASE_RE = re.compile(r'<head(.*?\>)', re.I)
        base_code = u'<base href="%s" />' % (self.feed.feed_link, )
        try:
            html = BASE_RE.sub(r'<head\1 ' + base_code, response)
        except: