def import_feed(self, feed): from molly.apps.feeds.models import Item, vCard xml = etree.parse(urllib2.urlopen(feed.rss_url)) feed.last_modified = datetime.now() feed.save(update_last_modified=True) items = set() for talk in xml.findall('talk'): item, created = Item.objects.get_or_create( feed=feed, guid=talk.find('id').text) item.last_modified = self.parse_date(talk.find('updated_at').text) item.title = talk.find('title').text.strip() item.description = sanitise_html( talk.find('abstract').text.strip()) item.link = talk.find('url').text item.dt_start = self.parse_date(talk.find('start_time').text) item.dt_end = self.parse_date(talk.find('end_time').text) location, created = vCard.objects.get_or_create( name=talk.find('venue').text.strip()) location.save() item.venue = location item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items
def import_feed(self, feed): from molly.apps.feeds.models import Item, vCard xml = etree.parse(urllib2.urlopen(feed.rss_url)) feed.last_modified = datetime.now() feed.save(update_last_modified=True) items = set() for talk in xml.findall('talk'): item, created = Item.objects.get_or_create(feed=feed, guid=talk.find('id').text) item.last_modified = self.parse_date(talk.find('updated_at').text) item.title = talk.find('title').text.strip() item.description = sanitise_html(talk.find('abstract').text.strip()) item.link = talk.find('url').text item.dt_start = self.parse_date(talk.find('start_time').text) item.dt_end = self.parse_date(talk.find('end_time').text) location, created = vCard.objects.get_or_create(name=talk.find('venue').text.strip()) location.save() item.venue = location item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items
def import_feed(self, feed): from molly.apps.feeds.models import Item, vCard calendar = Calendar.from_string(urllib2.urlopen(feed.rss_url).read()) items = set() for component in calendar.walk(): if component.name == 'VEVENT': item, created = Item.objects.get_or_create(feed=feed, guid=str(component.get('UID'))) # Do not create the event if one the property is not correct, # first tries to parse DT as datetime then as date, if it still # fails, then ignore try: try: item.dt_start = vDatetime.from_ical(str( component.get('DTSTART'))) except ValueError, ve: item.dt_start = vDate.from_ical(str( component.get('DTSTART'))) if component.get('DTEND'): try: item.dt_end = vDatetime.from_ical(str( component.get('DTEND'))) except ValueError, ve: item.dt_end = vDate.from_ical(str( component.get('DTEND'))) item.title = vText.from_ical(str( component.get('SUMMARY')).strip()) if component.get('URL'): item.link = str(component.get('URL')) if component.get('DESCRIPTION'): item.description = sanitise_html(vText.from_ical(str( component.get('DESCRIPTION')))) if str(component.get('LOCATION')) != '': location, created = vCard.objects.get_or_create( name=vText.from_ical(str( component.get('LOCATION')).strip())) # in the future, we could imagine to (try to) geocode # the location to get a point field... location.save() item.venue = location try: item.last_modified = vDatetime.from_ical(str( component.get('LAST-MODIFIED'))) except Exception, e: item.last_modified = datetime.now() item.save() items.add(item)
def import_feed(self, feed): from molly.apps.feeds.models import Item feed_data = feedparser.parse(feed.rss_url) try: feed.last_modified = \ struct_to_datetime(feed_data.feed.updated_parsed) except: feed.last_modified = \ parse_date(feed_data.headers.get('last-modified', datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"))) feed.save() items = set() for x_item in feed_data.entries: if hasattr(x_item, 'id'): guid = x_item.id else: # Some stupid feeds don't have any GUIDs, fall back to the URL # and hope it's unique guid = x_item.link try: last_modified = datetime(*x_item.date_parsed[:7]) except: last_modified = None for i in items: if i.guid == guid: item = i break else: try: item = Item.objects.get(guid=guid, feed=feed) except Item.DoesNotExist: item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed) if True or item.last_modified < last_modified: item.title = x_item.title item.description = sanitise_html(x_item.get('description', '')) item.link = x_item.link item.last_modified = last_modified item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items
def import_feed(self, feed): from molly.apps.feeds.models import Item feed_data = feedparser.parse(feed.rss_url) try: feed.last_modified = struct_to_datetime(feed_data.feed.updated_parsed) except: feed.last_modified = parse_date( feed_data.headers.get("last-modified", datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000")) ) feed.save() items = set() for x_item in feed_data.entries: if hasattr(x_item, "id"): guid = x_item.id else: # Some stupid feeds don't have any GUIDs, fall back to the URL # and hope it's unique guid = x_item.link try: last_modified = datetime(*x_item.date_parsed[:7]) except: last_modified = None for i in items: if i.guid == guid: item = i break else: try: item = Item.objects.get(guid=guid, feed=feed) except Item.DoesNotExist: item = Item(guid=guid, last_modified=datetime(1900, 1, 1), feed=feed) if True or item.last_modified < last_modified: item.title = x_item.title item.description = sanitise_html(x_item.get("description", "")) item.link = x_item.link item.last_modified = last_modified item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items
def import_feed(self, feed): from molly.apps.feeds.models import Item feed_data = feedparser.parse(feed.rss_url) try: feed.last_modified = struct_to_datetime(feed_data.feed.updated_parsed) except: feed.last_modified = parse_date(feed_data.headers.get('last-modified', datetime.now().strftime("%a, %d %b %Y %H:%M:%S +0000"))) feed.save() items = set() for x_item in feed_data.entries: guid, last_modified = x_item.id, datetime(*x_item.date_parsed[:7]) for i in items: if i.guid == guid: item = i break else: try: item = Item.objects.get(guid=guid, feed=feed) except Item.DoesNotExist: item = Item(guid=guid, last_modified=datetime(1900,1,1), feed=feed) if True or item.last_modified < last_modified: item.title = x_item.title item.description = sanitise_html(x_item.get('description', '')) item.link = x_item.link item.last_modified = last_modified item.save() items.add(item) for item in Item.objects.filter(feed=feed): if item not in items: item.delete() return items