Пример #1
0
	def check(self):
		from urllib import urlopen
		from time import mktime
		import logging, feedparser
		
		kwargs = {'page': 1}
		
		while True:
			logger = logging.getLogger('bambu.megaphone')
			
			try:
				feed = feedparser.parse(urlopen(self.url % kwargs))
			except IOError:
				logger.error('IO error when looking for feed items')
				break
			
			added = False
			
			for entry in feed.get('entries', []):
				url = helpers.fix_url(entry.link)
				if self.feed_ptr.items.filter(url = url).count() == 0:
					if self.feed_ptr.include_item(entry.title):
						logger.debug('Adding item %s' % url)
						content = entry.get('content', [])
						
						if len(content) > 0:
							content = content[0].get('value') or entry.description
						else:
							content = entry.get('description') or None
						
						try:
							Item.objects.create_item(
								feed = self.feed_ptr,
								primary_text = entry.title,
								secondary_text = content,
								url = url,
								date = datetime.fromtimestamp(
									mktime(entry.updated_parsed)
								).replace(tzinfo = utc),
								data = {},
								links = [url]
							)
							
							added = True
						except Exception, ex:
							logger.error('Error getting item: %s' % unicode(ex))
					else:
						logger.info('Ignoring item %s' % entry.link)
			
			if added:
				kwargs['page'] += 1
				logger.debug('Moving to page %(page)d' % kwargs)
			else:
				break
Пример #2
0
	def get_tweet_items(self, access_token, latest_item, **kwargs):
		from bambu.megaphone.helpers import fix_url
		from bambu.megaphone.models import ServiceFeed
		from urllib import urlopen
		from os import path
		
		page = 1
		logger = logging.getLogger('bambu.megaphone')
		kwargs = {}
		
		if latest_item:
			matches = TWEET_URL_REGEX.match(latest_item.url)
			if not matches or len(matches.groups()) == 0:
				raise Exception('Latest tweet URL doesn\'t match expected pattern')
			
			kwargs['since_id'] = matches.groups()[0]
		
		feed = ServiceFeed.objects.get(
			service__access_token = access_token
		)
		
		while True:
			try:
				data = self.get_url(self.get_messages_url, access_token,
					count = 200, page = page, include_entities = 1, **kwargs
				)
			except HTTPException:
				logger.error('Got bad HTTP response when looking for tweets')
				break
			except IOError:
				logger.error('IO error when looking for tweets')
				break
			
			try:
				items = simplejson.load(data)
			except Exception, ex:
				return
			
			if len(items) == 0 or not isinstance(items, (list, tuple)):
				break
			
			for item in items:
				user = item.pop('user')
				place = item.pop('coordinates')
				text = item.pop('text')
				attachments = []
				links = []
				
				if place:
					longitude, latitude = place.get('coordinates')
				else:
					latitude, longitude = None, None
				
				entities = item.pop('entities', {})
				existing_urls = False
				if any(entities):
					for url in entities.get('urls', []):
						fixed = fix_url(url.get('expanded_url'))
						if feed.items.filter(url__startswith = url):
							existing_urls = True
							break
						
						text = text.replace(
							url.get('url'), fix_url(fixed)
						)
						
						links.append(fixed)
					
					if existing_urls:
						logger.debug('Ignoring item as a repost')
						continue
					
					for url in entities.get('media', []):
						url = url.get('media_url')
						attachments.append(
							{
								'file': (urlopen, url),
								'title': url[:100],
								'url': url,
								'extension': path.splitext(url)[-1]
							}
						)
				
				yield {
					'date': (
						parse(item.get('created_at'))
					).replace(tzinfo = utc),
					'primary_text': text,
					'data': item,
					'url': 'http://twitter.com/%s/statuses/%s' % (
						user.get('id'), item.get('id')
					),
					'latitude': latitude,
					'longitude': longitude,
					'attachments': attachments,
					'links': links
				}
			
			if len(items) >= 100:
				page += 1
				logger.debug('Moving to page %d' % page)
			else:
				break