def check(self): from urllib import urlopen from time import mktime import logging, feedparser kwargs = {'page': 1} while True: logger = logging.getLogger('bambu.megaphone') try: feed = feedparser.parse(urlopen(self.url % kwargs)) except IOError: logger.error('IO error when looking for feed items') break added = False for entry in feed.get('entries', []): url = helpers.fix_url(entry.link) if self.feed_ptr.items.filter(url = url).count() == 0: if self.feed_ptr.include_item(entry.title): logger.debug('Adding item %s' % url) content = entry.get('content', []) if len(content) > 0: content = content[0].get('value') or entry.description else: content = entry.get('description') or None try: Item.objects.create_item( feed = self.feed_ptr, primary_text = entry.title, secondary_text = content, url = url, date = datetime.fromtimestamp( mktime(entry.updated_parsed) ).replace(tzinfo = utc), data = {}, links = [url] ) added = True except Exception, ex: logger.error('Error getting item: %s' % unicode(ex)) else: logger.info('Ignoring item %s' % entry.link) if added: kwargs['page'] += 1 logger.debug('Moving to page %(page)d' % kwargs) else: break
def get_tweet_items(self, access_token, latest_item, **kwargs): from bambu.megaphone.helpers import fix_url from bambu.megaphone.models import ServiceFeed from urllib import urlopen from os import path page = 1 logger = logging.getLogger('bambu.megaphone') kwargs = {} if latest_item: matches = TWEET_URL_REGEX.match(latest_item.url) if not matches or len(matches.groups()) == 0: raise Exception('Latest tweet URL doesn\'t match expected pattern') kwargs['since_id'] = matches.groups()[0] feed = ServiceFeed.objects.get( service__access_token = access_token ) while True: try: data = self.get_url(self.get_messages_url, access_token, count = 200, page = page, include_entities = 1, **kwargs ) except HTTPException: logger.error('Got bad HTTP response when looking for tweets') break except IOError: logger.error('IO error when looking for tweets') break try: items = simplejson.load(data) except Exception, ex: return if len(items) == 0 or not isinstance(items, (list, tuple)): break for item in items: user = item.pop('user') place = item.pop('coordinates') text = item.pop('text') attachments = [] links = [] if place: longitude, latitude = place.get('coordinates') else: latitude, longitude = None, None entities = item.pop('entities', {}) existing_urls = False if any(entities): for url in entities.get('urls', []): fixed = fix_url(url.get('expanded_url')) if feed.items.filter(url__startswith = url): existing_urls = True break text = text.replace( url.get('url'), fix_url(fixed) ) links.append(fixed) if existing_urls: logger.debug('Ignoring item as a repost') continue for url in entities.get('media', []): url = url.get('media_url') attachments.append( { 'file': (urlopen, url), 'title': url[:100], 'url': url, 'extension': path.splitext(url)[-1] } ) yield { 'date': ( parse(item.get('created_at')) ).replace(tzinfo = utc), 'primary_text': text, 'data': item, 'url': 'http://twitter.com/%s/statuses/%s' % ( user.get('id'), item.get('id') ), 'latitude': latitude, 'longitude': longitude, 'attachments': attachments, 'links': links } if len(items) >= 100: page += 1 logger.debug('Moving to page %d' % page) else: break