def extract_search_query(self): search_query = None address = qurl(self.address, remove=['_']) query = urlparse(address).query query_dict = parse_qs(query) if 'q' in query_dict: search_query = query_dict['q'][0] return search_query
def paginate_objects(self, request, objects): paginator = Paginator(objects, self.paginate_by) page = request.GET.get('page', 1) try: object_list = paginator.page(page) except PageNotAnInteger: object_list = paginator.page(1) except EmptyPage: object_list = paginator.page(paginator.num_pages) pagination = { 'count': object_list.paginator.count, 'num_pages': object_list.paginator.num_pages, 'previous_page_number': None, 'previous_url': None, 'next_page_number': None, 'next_url': None } url = request.get_full_path() if object_list.has_next(): next_page_number = object_list.next_page_number() pagination['next_page_number'] = next_page_number pagination['next_url'] = 'http://{0}{1}'.format( request.get_host(), qurl( url, add={'page': object_list.next_page_number()} ) ) if object_list.has_previous(): previous_page_number = object_list.previous_page_number() pagination['previous_page_number'] = previous_page_number pagination['previous_url'] = 'http://{0}{1}'.format( request.get_host(), qurl( url, add={'page': object_list.previous_page_number()} ) ) return object_list, pagination
def extract_username(self): username = None try: address = qurl(self.address, remove=['_']) username_groups = re.search('twitter.com/(\w+)/?$', address) if not username_groups: return username = username_groups.group(1) except IndexError: return return username
def fetch(self): """ Uses requests to download the feed, parsing it in feedparser. Will be storified later. """ start = time.time() identity = self.get_identity() log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity, self.feed.title[:30], self.feed.id, datetime.datetime.now() - self.feed.last_update) logging.debug(log_msg) etag=self.feed.etag modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None address = self.feed.feed_address if (self.options.get('force') or random.random() <= .01): self.options['force'] = True modified = None etag = None address = qurl(address, add={"_": random.randint(0, 10000)}) logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % ( self.feed.title[:30], address)) elif (not self.feed.fetched_once or not self.feed.known_good): modified = None etag = None USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s ' '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) ' 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 ' 'Safari/534.48.3)' % ( self.feed.num_subscribers, 's' if self.feed.num_subscribers != 1 else '', self.feed.permalink, )) if self.options.get('feed_xml'): logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % ( self.feed.title[:30], len(self.options.get('feed_xml')))) if self.options.get('fpf'): self.fpf = self.options.get('fpf') logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % ( self.feed.title[:30])) return FEED_OK, self.fpf if 'youtube.com' in address: try: youtube_feed = self.fetch_youtube(address) except (requests.adapters.ConnectionError): youtube_feed = None if not youtube_feed: logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' % (self.feed.title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(youtube_feed) elif re.match('(https?)?://twitter.com/\w+/?$', qurl(address, remove=['_'])): # try: twitter_feed = self.fetch_twitter(address) # except Exception, e: # logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %e' % # (self.feed.title[:30], address, e)) # twitter_feed = None if not twitter_feed: logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s' % (self.feed.title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(twitter_feed) if not self.fpf: try: headers = { 'User-Agent': USER_AGENT, 'Accept-encoding': 'gzip, deflate', 'A-IM': 'feed', } if etag: headers['If-None-Match'] = etag if modified: # format into an RFC 1123-compliant timestamp. We can't use # time.strftime() since the %a and %b directives can be affected # by the current locale, but RFC 2616 states that dates must be # in English. short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]) headers['If-Modified-Since'] = modified_header raw_feed = requests.get(address, headers=headers) if raw_feed.content: response_headers = raw_feed.headers response_headers['Content-Location'] = raw_feed.url self.fpf = feedparser.parse(smart_unicode(raw_feed.content), response_headers=response_headers) except Exception, e: logging.debug(" ---> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.title[:30], unicode(e)[:100])) if not self.fpf: try: self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified) except (TypeError, ValueError, KeyError, EOFError), e: logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' % (self.feed.title[:30], e)) pass
def process(self): """ Downloads and parses a feed. """ start = time.time() self.refresh_feed() ret_values = dict(new=0, updated=0, same=0, error=0) if hasattr(self.fpf, 'status'): if self.options['verbose']: if self.fpf.bozo and self.fpf.status != 304: logging.debug(u' ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % ( self.feed.title[:30], self.fpf.bozo_exception, len(self.fpf.entries))) if self.fpf.status == 304: self.feed = self.feed.save() self.feed.save_feed_history(304, "Not modified") return FEED_SAME, ret_values # 302: Temporary redirect: ignore # 301: Permanent redirect: save it (after 10 tries) if self.fpf.status == 301: if self.fpf.href.endswith('feedburner.com/atom.xml'): return FEED_ERRHTTP, ret_values redirects, non_redirects = self.feed.count_redirects_in_history('feed') self.feed.save_feed_history(self.fpf.status, "HTTP Redirect (%d to go)" % (10-len(redirects))) if len(redirects) >= 10 or len(non_redirects) == 0: address = self.fpf.href if self.options['force'] and address: address = qurl(address, remove=['_']) self.feed.feed_address = address if not self.feed.known_good: self.feed.fetched_once = True logging.debug(" ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status)) self.feed = self.feed.schedule_feed_fetch_immediately() if not self.fpf.entries: self.feed = self.feed.save() self.feed.save_feed_history(self.fpf.status, "HTTP Redirect") return FEED_ERRHTTP, ret_values if self.fpf.status >= 400: logging.debug(" ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status)) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(self.fpf.status, "HTTP Error") else: self.feed = feed self.feed = self.feed.save() return FEED_ERRHTTP, ret_values if not self.fpf: logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.title[:30])) self.feed.save_feed_history(551, "Broken feed") return FEED_ERRHTTP, ret_values if self.fpf and not self.fpf.entries: if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType): logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException): logging.debug(" ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values # the feed has changed (or it is the first time we parse it) # saving the etag and last_modified fields original_etag = self.feed.etag self.feed.etag = self.fpf.get('etag') if self.feed.etag: self.feed.etag = self.feed.etag[:255] # some times this is None (it never should) *sigh* if self.feed.etag is None: self.feed.etag = '' if self.feed.etag != original_etag: self.feed.save(update_fields=['etag']) original_last_modified = self.feed.last_modified if hasattr(self.fpf, 'modified') and self.fpf.modified: try: self.feed.last_modified = datetime.datetime.strptime(self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z') except Exception, e: self.feed.last_modified = None logging.debug("Broken mtime %s: %s" % (self.feed.last_modified, e)) pass
def fetch(self): """ Uses requests to download the feed, parsing it in feedparser. Will be storified later. """ start = time.time() identity = self.get_identity() log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % ( identity, self.feed.title[:30], self.feed.id, datetime.datetime.now() - self.feed.last_update) logging.debug(log_msg) etag = self.feed.etag modified = self.feed.last_modified.utctimetuple( )[:7] if self.feed.last_modified else None address = self.feed.feed_address if (self.options.get('force') or random.random() <= .01): self.options['force'] = True modified = None etag = None address = qurl(address, add={"_": random.randint(0, 10000)}) logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (self.feed.title[:30], address)) elif (not self.feed.fetched_once or not self.feed.known_good): modified = None etag = None USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s ' '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) ' 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 ' 'Safari/534.48.3)' % ( self.feed.num_subscribers, 's' if self.feed.num_subscribers != 1 else '', self.feed.permalink, )) if self.options.get('feed_xml'): logging.debug( u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (self.feed.title[:30], len(self.options.get('feed_xml')))) if self.options.get('fpf'): self.fpf = self.options.get('fpf') logging.debug( u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (self.feed.title[:30])) return FEED_OK, self.fpf if 'youtube.com' in address: try: youtube_feed = self.fetch_youtube(address) except (requests.adapters.ConnectionError): youtube_feed = None if not youtube_feed: logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' % (self.feed.title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(youtube_feed) if not self.fpf: try: headers = { 'User-Agent': USER_AGENT, 'Accept-encoding': 'gzip, deflate', 'A-IM': 'feed', } if etag: headers['If-None-Match'] = etag if modified: # format into an RFC 1123-compliant timestamp. We can't use # time.strftime() since the %a and %b directives can be affected # by the current locale, but RFC 2616 states that dates must be # in English. short_weekdays = [ 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun' ] months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % ( short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]) headers['If-Modified-Since'] = modified_header raw_feed = requests.get(address, headers=headers) if raw_feed.content: response_headers = raw_feed.headers response_headers['Content-Location'] = raw_feed.url self.fpf = feedparser.parse( smart_unicode(raw_feed.content), response_headers=response_headers) except Exception, e: logging.debug( " ---> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.title[:30], unicode(e)[:100])) if not self.fpf: try: self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified) except (TypeError, ValueError, KeyError, EOFError), e: logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' % (self.feed.title[:30], e)) pass
def process(self): """ Downloads and parses a feed. """ start = time.time() self.refresh_feed() ret_values = dict(new=0, updated=0, same=0, error=0) if hasattr(self.fpf, 'status'): if self.options['verbose']: if self.fpf.bozo and self.fpf.status != 304: logging.debug( u' ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (self.feed.title[:30], self.fpf.bozo_exception, len(self.fpf.entries))) if self.fpf.status == 304: self.feed = self.feed.save() self.feed.save_feed_history(304, "Not modified") return FEED_SAME, ret_values # 302: Temporary redirect: ignore # 301: Permanent redirect: save it (after 10 tries) if self.fpf.status == 301: if self.fpf.href.endswith('feedburner.com/atom.xml'): return FEED_ERRHTTP, ret_values redirects, non_redirects = self.feed.count_redirects_in_history( 'feed') self.feed.save_feed_history( self.fpf.status, "HTTP Redirect (%d to go)" % (10 - len(redirects))) if len(redirects) >= 10 or len(non_redirects) == 0: address = self.fpf.href if self.options['force'] and address: address = qurl(address, remove=['_']) self.feed.feed_address = address if not self.feed.known_good: self.feed.fetched_once = True logging.debug( " ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status)) self.feed = self.feed.schedule_feed_fetch_immediately() if not self.fpf.entries: self.feed = self.feed.save() self.feed.save_feed_history(self.fpf.status, "HTTP Redirect") return FEED_ERRHTTP, ret_values if self.fpf.status >= 400: logging.debug( " ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status)) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address( ) if not fixed_feed: self.feed.save_feed_history(self.fpf.status, "HTTP Error") else: self.feed = feed self.feed = self.feed.save() return FEED_ERRHTTP, ret_values if not self.fpf: logging.debug( " ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.title[:30])) self.feed.save_feed_history(551, "Broken feed") return FEED_ERRHTTP, ret_values if self.fpf and not self.fpf.entries: if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType): logging.debug( " ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address( ) if not fixed_feed: self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values elif self.fpf.bozo and isinstance( self.fpf.bozo_exception, xml.sax._exceptions.SAXException): logging.debug( " ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address( ) if not fixed_feed: self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values # the feed has changed (or it is the first time we parse it) # saving the etag and last_modified fields original_etag = self.feed.etag self.feed.etag = self.fpf.get('etag') if self.feed.etag: self.feed.etag = self.feed.etag[:255] # some times this is None (it never should) *sigh* if self.feed.etag is None: self.feed.etag = '' if self.feed.etag != original_etag: self.feed.save(update_fields=['etag']) original_last_modified = self.feed.last_modified if hasattr(self.fpf, 'modified') and self.fpf.modified: try: self.feed.last_modified = datetime.datetime.strptime( self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z') except Exception, e: self.feed.last_modified = None logging.debug("Broken mtime %s: %s" % (self.feed.last_modified, e)) pass
class ProcessFeed: def __init__(self, feed_id, fpf, options): self.feed_id = feed_id self.options = options self.fpf = fpf def refresh_feed(self): self.feed = Feed.get_by_id(self.feed_id) if self.feed_id != self.feed.pk: logging.debug(" ***> Feed has changed: from %s to %s" % (self.feed_id, self.feed.pk)) self.feed_id = self.feed.pk def process(self): """ Downloads and parses a feed. """ start = time.time() self.refresh_feed() ret_values = dict(new=0, updated=0, same=0, error=0) if hasattr(self.fpf, 'status'): if self.options['verbose']: if self.fpf.bozo and self.fpf.status != 304: logging.debug( u' ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % (self.feed.title[:30], self.fpf.bozo_exception, len(self.fpf.entries))) if self.fpf.status == 304: self.feed = self.feed.save() self.feed.save_feed_history(304, "Not modified") return FEED_SAME, ret_values # 302: Temporary redirect: ignore # 301: Permanent redirect: save it (after 10 tries) if self.fpf.status == 301: if self.fpf.href.endswith('feedburner.com/atom.xml'): return FEED_ERRHTTP, ret_values redirects, non_redirects = self.feed.count_redirects_in_history( 'feed') self.feed.save_feed_history( self.fpf.status, "HTTP Redirect (%d to go)" % (10 - len(redirects))) if len(redirects) >= 10 or len(non_redirects) == 0: address = self.fpf.href if self.options['force'] and address: address = qurl(address, remove=['_']) self.feed.feed_address = address if not self.feed.known_good: self.feed.fetched_once = True logging.debug( " ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status)) self.feed = self.feed.schedule_feed_fetch_immediately() if not self.fpf.entries: self.feed = self.feed.save() self.feed.save_feed_history(self.fpf.status, "HTTP Redirect") return FEED_ERRHTTP, ret_values if self.fpf.status >= 400: logging.debug( " ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status)) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address( ) if not fixed_feed: self.feed.save_feed_history(self.fpf.status, "HTTP Error") else: self.feed = feed self.feed = self.feed.save() return FEED_ERRHTTP, ret_values if not self.fpf: logging.debug( " ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.title[:30])) self.feed.save_feed_history(551, "Broken feed") return FEED_ERRHTTP, ret_values if self.fpf and not self.fpf.entries: if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType): logging.debug( " ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address( ) if not fixed_feed: self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values elif self.fpf.bozo and isinstance( self.fpf.bozo_exception, xml.sax._exceptions.SAXException): logging.debug( " ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address( ) if not fixed_feed: self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values # the feed has changed (or it is the first time we parse it) # saving the etag and last_modified fields original_etag = self.feed.etag self.feed.etag = self.fpf.get('etag') if self.feed.etag: self.feed.etag = self.feed.etag[:255] # some times this is None (it never should) *sigh* if self.feed.etag is None: self.feed.etag = '' if self.feed.etag != original_etag: self.feed.save(update_fields=['etag']) original_last_modified = self.feed.last_modified if hasattr(self.fpf, 'modified') and self.fpf.modified: try: self.feed.last_modified = datetime.datetime.strptime( self.fpf.modified, '%a, %d %b %Y %H:%M:%S %Z') except Exception, e: self.feed.last_modified = None logging.debug("Broken mtime %s: %s" % (self.feed.last_modified, e)) pass if self.feed.last_modified != original_last_modified: self.feed.save(update_fields=['last_modified']) self.fpf.entries = self.fpf.entries[:100] original_title = self.feed.feed_title if self.fpf.feed.get('title'): self.feed.feed_title = strip_tags(self.fpf.feed.get('title')) if self.feed.feed_title != original_title: self.feed.save(update_fields=['feed_title']) tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline) if tagline: original_tagline = self.feed.data.feed_tagline self.feed.data.feed_tagline = smart_unicode(tagline) if self.feed.data.feed_tagline != original_tagline: self.feed.data.save(update_fields=['feed_tagline']) if not self.feed.feed_link_locked: new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get( 'id') or self.feed.feed_link if self.options['force'] and new_feed_link: new_feed_link = qurl(new_feed_link, remove=['_']) if new_feed_link != self.feed.feed_link: logging.debug( " ---> [%-30s] ~SB~FRFeed's page is different: %s to %s" % (self.feed.title[:30], self.feed.feed_link, new_feed_link)) redirects, non_redirects = self.feed.count_redirects_in_history( 'page') self.feed.save_page_history( 301, "HTTP Redirect (%s to go)" % (10 - len(redirects))) if len(redirects) >= 10 or len(non_redirects) == 0: self.feed.feed_link = new_feed_link self.feed.save(update_fields=['feed_link']) # Determine if stories aren't valid and replace broken guids guids_seen = set() permalinks_seen = set() for entry in self.fpf.entries: guids_seen.add(entry.get('guid')) permalinks_seen.add(Feed.get_permalink(entry)) guid_difference = len(guids_seen) != len(self.fpf.entries) single_guid = len(guids_seen) == 1 replace_guids = single_guid and guid_difference permalink_difference = len(permalinks_seen) != len(self.fpf.entries) single_permalink = len(permalinks_seen) == 1 replace_permalinks = single_permalink and permalink_difference # Compare new stories to existing stories, adding and updating start_date = datetime.datetime.utcnow() story_hashes = [] stories = [] for entry in self.fpf.entries: story = pre_process_story(entry) if story.get('published') < start_date: start_date = story.get('published') if replace_guids: if replace_permalinks: new_story_guid = unicode(story.get('published')) if self.options['verbose']: logging.debug( u' ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s' % (self.feed.title[:30], story.get('guid'), new_story_guid)) story['guid'] = new_story_guid else: new_story_guid = Feed.get_permalink(story) if self.options['verbose']: logging.debug( u' ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s' % (self.feed.title[:30], story.get('guid'), new_story_guid)) story['guid'] = new_story_guid story['story_hash'] = MStory.feed_guid_hash_unsaved( self.feed.pk, story.get('guid')) stories.append(story) story_hashes.append(story.get('story_hash')) existing_stories = dict((s.story_hash, s) for s in MStory.objects( story_hash__in=story_hashes, # story_date__gte=start_date, # story_feed_id=self.feed.pk )) ret_values = self.feed.add_update_stories( stories, existing_stories, verbose=self.options['verbose'], updates_off=self.options['updates_off']) if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links') and self.fpf.feed.links): hub_url = None self_url = self.feed.feed_address for link in self.fpf.feed.links: if link['rel'] == 'hub' and not hub_url: hub_url = link['href'] elif link['rel'] == 'self': self_url = link['href'] push_expired = False if self.feed.is_push: try: push_expired = self.feed.push.lease_expires < datetime.datetime.now( ) except PushSubscription.DoesNotExist: self.feed.is_push = False if (hub_url and self_url and not settings.DEBUG and self.feed.active_subscribers > 0 and (push_expired or not self.feed.is_push or self.options.get('force'))): logging.debug( u' ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' % (self.feed.title[:30], "~SKRe-~SN" if push_expired else "", hub_url)) try: PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url) except TimeoutError: logging.debug( u' ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s' % (self.feed.title[:30], hub_url)) elif (self.feed.is_push and (self.feed.active_subscribers <= 0 or not hub_url)): logging.debug( u' ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' % (self.feed.title[:30])) self.feed.is_push = False self.feed = self.feed.save() logging.debug( u' ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % (self.feed.title[:30], '~FG~SB' if ret_values['new'] else '', ret_values['new'], '~FY~SB' if ret_values['updated'] else '', ret_values['updated'], '~SB' if ret_values['same'] else '', ret_values['same'], '~FR~SB' if ret_values['error'] else '', ret_values['error'], len(self.fpf.entries))) self.feed.update_all_statistics(has_new_stories=bool( ret_values['new']), force=self.options['force']) if ret_values['new']: self.feed.trim_feed() self.feed.expire_redis() self.feed.save_feed_history(200, "OK") if self.options['verbose']: logging.debug(u' ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % (self.feed.title[:30], time.time() - start)) return FEED_OK, ret_values
def fetch(self): """ Uses requests to download the feed, parsing it in feedparser. Will be storified later. """ start = time.time() identity = self.get_identity() log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % ( identity, self.feed.log_title[:30], self.feed.id, datetime.datetime.now() - self.feed.last_update) logging.debug(log_msg) etag = self.feed.etag modified = self.feed.last_modified.utctimetuple( )[:7] if self.feed.last_modified else None address = self.feed.feed_address if (self.options.get('force') or random.random() <= .01): self.options['force'] = True modified = None etag = None if address.startswith('http'): address = qurl(address, add={"_": random.randint(0, 10000)}) logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (self.feed.log_title[:30], address)) elif (not self.feed.fetched_once or not self.feed.known_good): modified = None etag = None if self.options.get('feed_xml'): logging.debug( u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (self.feed.log_title[:30], len(self.options.get('feed_xml')))) if self.options.get('fpf'): self.fpf = self.options.get('fpf') logging.debug( u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (self.feed.log_title[:30])) return FEED_OK, self.fpf if 'youtube.com' in address: try: youtube_feed = self.fetch_youtube(address) except (requests.adapters.ConnectionError): youtube_feed = None if not youtube_feed: logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(youtube_feed) elif re.match(r'(https?)?://twitter.com/\w+/?', qurl(address, remove=['_'])): twitter_feed = self.fetch_twitter(address) if not twitter_feed: logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(twitter_feed) elif re.match(r'(.*?)facebook.com/\w+/?$', qurl(address, remove=['_'])): facebook_feed = self.fetch_facebook() if not facebook_feed: logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(facebook_feed) if not self.fpf: try: headers = self.feed.fetch_headers() if etag: headers['If-None-Match'] = etag if modified: # format into an RFC 1123-compliant timestamp. We can't use # time.strftime() since the %a and %b directives can be affected # by the current locale, but RFC 2616 states that dates must be # in English. short_weekdays = [ 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun' ] months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % ( short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]) headers['If-Modified-Since'] = modified_header if etag or modified: headers['A-IM'] = 'feed' raw_feed = requests.get(address, headers=headers) if raw_feed.status_code >= 400: logging.debug( " ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers)) raw_feed = requests.get( self.feed.feed_address, headers=self.feed.fetch_headers(fake=True)) if raw_feed.content and 'application/json' in raw_feed.headers.get( 'Content-Type', ""): # JSON Feed json_feed = self.fetch_json_feed(address, raw_feed) if not json_feed: logging.debug( u' ***> [%-30s] ~FRJSON fetch failed: %s' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(json_feed) elif raw_feed.content and raw_feed.status_code < 400: response_headers = raw_feed.headers response_headers['Content-Location'] = raw_feed.url self.raw_feed = smart_unicode(raw_feed.content) self.fpf = feedparser.parse( self.raw_feed, response_headers=response_headers) if self.options.get('debug', False): logging.debug( " ---> [%-30s] ~FBFeed fetch status %s: %s length / %s" % (self.feed.log_title[:30], raw_feed.status_code, len(smart_unicode( raw_feed.content)), raw_feed.headers)) except Exception, e: logging.debug( " ***> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.log_title[:30], unicode(e)[:100])) if not self.fpf or self.options.get('force_fp', False): try: self.fpf = feedparser.parse(address, agent=self.feed.user_agent, etag=etag, modified=modified) except (TypeError, ValueError, KeyError, EOFError, MemoryError), e: logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' % (self.feed.log_title[:30], e)) pass
def fetch(self): """ Uses requests to download the feed, parsing it in feedparser. Will be storified later. """ start = time.time() identity = self.get_identity() log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity, self.feed.log_title[:30], self.feed.id, datetime.datetime.now() - self.feed.last_update) logging.debug(log_msg) etag = self.feed.etag modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None address = self.feed.feed_address if (self.options.get('force') or random.random() <= .01): self.options['force'] = True modified = None etag = None address = qurl(address, add={"_": random.randint(0, 10000)}) logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % ( self.feed.log_title[:30], address)) elif (not self.feed.fetched_once or not self.feed.known_good): modified = None etag = None if self.options.get('feed_xml'): logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % ( self.feed.log_title[:30], len(self.options.get('feed_xml')))) if self.options.get('fpf'): self.fpf = self.options.get('fpf') logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % ( self.feed.log_title[:30])) return FEED_OK, self.fpf if 'youtube.com' in address: try: youtube_feed = self.fetch_youtube(address) except (requests.adapters.ConnectionError): youtube_feed = None if not youtube_feed: logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(youtube_feed) elif re.match(r'(https?)?://twitter.com/\w+/?$', qurl(address, remove=['_'])): twitter_feed = self.fetch_twitter(address) if not twitter_feed: logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(twitter_feed) elif re.match(r'(.*?)facebook.com/\w+/?$', qurl(address, remove=['_'])): facebook_feed = self.fetch_facebook() if not facebook_feed: logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(facebook_feed) if not self.fpf: try: headers = self.feed.fetch_headers() if etag: headers['If-None-Match'] = etag if modified: # format into an RFC 1123-compliant timestamp. We can't use # time.strftime() since the %a and %b directives can be affected # by the current locale, but RFC 2616 states that dates must be # in English. short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]) headers['If-Modified-Since'] = modified_header if etag or modified: headers['A-IM'] = 'feed' raw_feed = requests.get(address, headers=headers) if raw_feed.status_code >= 400: logging.debug(" ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers)) raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True)) if raw_feed.content and 'application/json' in raw_feed.headers.get('Content-Type', ""): # JSON Feed json_feed = self.fetch_json_feed(address, raw_feed) if not json_feed: logging.debug(u' ***> [%-30s] ~FRJSON fetch failed: %s' % (self.feed.log_title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(json_feed) elif raw_feed.content and raw_feed.status_code < 400: response_headers = raw_feed.headers response_headers['Content-Location'] = raw_feed.url self.raw_feed = smart_unicode(raw_feed.content) self.fpf = feedparser.parse(self.raw_feed, response_headers=response_headers) if self.options.get('debug', False): logging.debug(" ---> [%-30s] ~FBFeed fetch status %s: %s length / %s" % (self.feed.log_title[:30], raw_feed.status_code, len(smart_unicode(raw_feed.content)), raw_feed.headers)) except Exception, e: logging.debug(" ***> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.log_title[:30], unicode(e)[:100])) if not self.fpf or self.options.get('force_fp', False): try: self.fpf = feedparser.parse(address, agent=self.feed.user_agent, etag=etag, modified=modified) except (TypeError, ValueError, KeyError, EOFError, MemoryError), e: logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' % (self.feed.log_title[:30], e)) pass
def process(self): """ Downloads and parses a feed. """ start = time.time() self.refresh_feed() ret_values = dict(new=0, updated=0, same=0, error=0) if hasattr(self.fpf, 'status'): if self.options['verbose']: if self.fpf.bozo and self.fpf.status != 304: logging.debug(u' ---> [%-30s] ~FRBOZO exception: %s ~SB(%s entries)' % ( self.feed.title[:30], self.fpf.bozo_exception, len(self.fpf.entries))) if self.fpf.status == 304: self.feed = self.feed.save() self.feed.save_feed_history(304, "Not modified") return FEED_SAME, ret_values # 302: Temporary redirect: ignore # 301: Permanent redirect: save it (after 10 tries) if self.fpf.status == 301: if self.fpf.href.endswith('feedburner.com/atom.xml'): return FEED_ERRHTTP, ret_values redirects, non_redirects = self.feed.count_redirects_in_history('feed') self.feed.save_feed_history(self.fpf.status, "HTTP Redirect (%d to go)" % (10-len(redirects))) if len(redirects) >= 10 or len(non_redirects) == 0: address = self.fpf.href if self.options['force'] and address: address = qurl(address, remove=['_']) self.feed.feed_address = address if not self.feed.known_good: self.feed.fetched_once = True logging.debug(" ---> [%-30s] ~SB~SK~FRFeed is %s'ing. Refetching..." % (self.feed.title[:30], self.fpf.status)) self.feed = self.feed.schedule_feed_fetch_immediately() if not self.fpf.entries: self.feed = self.feed.save() self.feed.save_feed_history(self.fpf.status, "HTTP Redirect") return FEED_ERRHTTP, ret_values if self.fpf.status >= 400: logging.debug(" ---> [%-30s] ~SB~FRHTTP Status code: %s. Checking address..." % (self.feed.title[:30], self.fpf.status)) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(self.fpf.status, "HTTP Error") else: self.feed = feed self.feed = self.feed.save() return FEED_ERRHTTP, ret_values if not self.fpf: logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. No feedparser feed either!" % (self.feed.title[:30])) self.feed.save_feed_history(551, "Broken feed") return FEED_ERRHTTP, ret_values if self.fpf and not self.fpf.entries: if self.fpf.bozo and isinstance(self.fpf.bozo_exception, feedparser.NonXMLContentType): logging.debug(" ---> [%-30s] ~SB~FRFeed is Non-XML. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(552, 'Non-xml feed', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values elif self.fpf.bozo and isinstance(self.fpf.bozo_exception, xml.sax._exceptions.SAXException): logging.debug(" ---> [%-30s] ~SB~FRFeed has SAX/XML parsing issues. %s entries. Checking address..." % (self.feed.title[:30], len(self.fpf.entries))) fixed_feed = None if not self.feed.known_good: fixed_feed, feed = self.feed.check_feed_link_for_feed_address() if not fixed_feed: self.feed.save_feed_history(553, 'SAX Exception', self.fpf.bozo_exception) else: self.feed = feed self.feed = self.feed.save() return FEED_ERRPARSE, ret_values # the feed has changed (or it is the first time we parse it) # saving the etag and last_modified fields original_etag = self.feed.etag self.feed.etag = self.fpf.get('etag') if self.feed.etag: self.feed.etag = self.feed.etag[:255] # some times this is None (it never should) *sigh* if self.feed.etag is None: self.feed.etag = '' if self.feed.etag != original_etag: self.feed.save(update_fields=['etag']) original_last_modified = self.feed.last_modified try: self.feed.last_modified = mtime(self.fpf.modified) except: self.feed.last_modified = None pass if self.feed.last_modified != original_last_modified: self.feed.save(update_fields=['last_modified']) self.fpf.entries = self.fpf.entries[:100] original_title = self.feed.feed_title if self.fpf.feed.get('title'): self.feed.feed_title = strip_tags(self.fpf.feed.get('title')) if self.feed.feed_title != original_title: self.feed.save(update_fields=['feed_title']) tagline = self.fpf.feed.get('tagline', self.feed.data.feed_tagline) if tagline: original_tagline = self.feed.data.feed_tagline self.feed.data.feed_tagline = smart_unicode(tagline) if self.feed.data.feed_tagline != original_tagline: self.feed.data.save(update_fields=['feed_tagline']) if not self.feed.feed_link_locked: new_feed_link = self.fpf.feed.get('link') or self.fpf.feed.get('id') or self.feed.feed_link if self.options['force'] and new_feed_link: new_feed_link = qurl(new_feed_link, remove=['_']) if new_feed_link != self.feed.feed_link: logging.debug(" ---> [%-30s] ~SB~FRFeed's page is different: %s to %s" % (self.feed.title[:30], self.feed.feed_link, new_feed_link)) redirects, non_redirects = self.feed.count_redirects_in_history('page') self.feed.save_page_history(301, "HTTP Redirect (%s to go)" % (10-len(redirects))) if len(redirects) >= 10 or len(non_redirects) == 0: self.feed.feed_link = new_feed_link self.feed.save(update_fields=['feed_link']) # Determine if stories aren't valid and replace broken guids guids_seen = set() permalinks_seen = set() for entry in self.fpf.entries: guids_seen.add(entry.get('guid')) permalinks_seen.add(Feed.get_permalink(entry)) guid_difference = len(guids_seen) != len(self.fpf.entries) single_guid = len(guids_seen) == 1 replace_guids = single_guid and guid_difference permalink_difference = len(permalinks_seen) != len(self.fpf.entries) single_permalink = len(permalinks_seen) == 1 replace_permalinks = single_permalink and permalink_difference # Compare new stories to existing stories, adding and updating start_date = datetime.datetime.utcnow() story_hashes = [] stories = [] for entry in self.fpf.entries: story = pre_process_story(entry) if story.get('published') < start_date: start_date = story.get('published') if replace_guids: if replace_permalinks: new_story_guid = unicode(story.get('published')) if self.options['verbose']: logging.debug(u' ---> [%-30s] ~FBReplacing guid (%s) with timestamp: %s' % ( self.feed.title[:30], story.get('guid'), new_story_guid)) story['guid'] = new_story_guid else: new_story_guid = Feed.get_permalink(story) if self.options['verbose']: logging.debug(u' ---> [%-30s] ~FBReplacing guid (%s) with permalink: %s' % ( self.feed.title[:30], story.get('guid'), new_story_guid)) story['guid'] = new_story_guid story['story_hash'] = MStory.feed_guid_hash_unsaved(self.feed.pk, story.get('guid')) stories.append(story) story_hashes.append(story.get('story_hash')) existing_stories = dict((s.story_hash, s) for s in MStory.objects( story_hash__in=story_hashes, # story_date__gte=start_date, # story_feed_id=self.feed.pk )) ret_values = self.feed.add_update_stories(stories, existing_stories, verbose=self.options['verbose'], updates_off=self.options['updates_off']) if (hasattr(self.fpf, 'feed') and hasattr(self.fpf.feed, 'links') and self.fpf.feed.links): hub_url = None self_url = self.feed.feed_address for link in self.fpf.feed.links: if link['rel'] == 'hub' and not hub_url: hub_url = link['href'] elif link['rel'] == 'self': self_url = link['href'] push_expired = False if self.feed.is_push: try: push_expired = self.feed.push.lease_expires < datetime.datetime.now() except PushSubscription.DoesNotExist: self.feed.is_push = False if (hub_url and self_url and not settings.DEBUG and self.feed.active_subscribers > 0 and (push_expired or not self.feed.is_push or self.options.get('force'))): logging.debug(u' ---> [%-30s] ~BB~FW%sSubscribing to PuSH hub: %s' % ( self.feed.title[:30], "~SKRe-~SN" if push_expired else "", hub_url)) try: PushSubscription.objects.subscribe(self_url, feed=self.feed, hub=hub_url) except TimeoutError: logging.debug(u' ---> [%-30s] ~BB~FW~FRTimed out~FW subscribing to PuSH hub: %s' % ( self.feed.title[:30], hub_url)) elif (self.feed.is_push and (self.feed.active_subscribers <= 0 or not hub_url)): logging.debug(u' ---> [%-30s] ~BB~FWTurning off PuSH, no hub found' % ( self.feed.title[:30])) self.feed.is_push = False self.feed = self.feed.save() logging.debug(u' ---> [%-30s] ~FYParsed Feed: %snew=%s~SN~FY %sup=%s~SN same=%s%s~SN %serr=%s~SN~FY total=~SB%s' % ( self.feed.title[:30], '~FG~SB' if ret_values['new'] else '', ret_values['new'], '~FY~SB' if ret_values['updated'] else '', ret_values['updated'], '~SB' if ret_values['same'] else '', ret_values['same'], '~FR~SB' if ret_values['error'] else '', ret_values['error'], len(self.fpf.entries))) self.feed.update_all_statistics(has_new_stories=bool(ret_values['new']), force=self.options['force']) if ret_values['new']: self.feed.trim_feed() self.feed.expire_redis() self.feed.save_feed_history(200, "OK") if self.options['verbose']: logging.debug(u' ---> [%-30s] ~FBTIME: feed parse in ~FM%.4ss' % ( self.feed.title[:30], time.time() - start)) return FEED_OK, ret_values
def fetch(self): """ Uses feedparser to download the feed. Will be parsed later. """ start = time.time() identity = self.get_identity() log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % ( identity, self.feed.title[:30], self.feed.id, datetime.datetime.now() - self.feed.last_update) logging.debug(log_msg) etag = self.feed.etag modified = self.feed.last_modified.utctimetuple( )[:7] if self.feed.last_modified else None address = self.feed.feed_address if (self.options.get('force') or random.random() <= .01): self.options['force'] = True modified = None etag = None address = qurl(address, add={"_": random.randint(0, 10000)}) logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (self.feed.title[:30], address)) elif (not self.feed.fetched_once or not self.feed.known_good): modified = None etag = None USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s ' '(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) ' 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 ' 'Safari/534.48.3)' % ( self.feed.num_subscribers, 's' if self.feed.num_subscribers != 1 else '', self.feed.permalink, )) if self.options.get('feed_xml'): logging.debug( u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (self.feed.title[:30], len(self.options.get('feed_xml')))) if self.options.get('fpf'): self.fpf = self.options.get('fpf') logging.debug( u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (self.feed.title[:30])) return FEED_OK, self.fpf if 'youtube.com' in address: try: youtube_feed = self.fetch_youtube(address) except (requests.adapters.ConnectionError): youtube_feed = None if not youtube_feed: logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' % (self.feed.title[:30], address)) return FEED_ERRHTTP, None self.fpf = feedparser.parse(youtube_feed) if not self.fpf: try: self.fpf = feedparser.parse(address, agent=USER_AGENT, etag=etag, modified=modified) except (TypeError, ValueError, KeyError, EOFError), e: logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' % (self.feed.title[:30], e)) pass