Пример #1
0
    def get(self):
#        wave = self.robot.new_wave(domain="googlewave.com", participants=['*****@*****.**',
#                                                                          '*****@*****.**',
#                                                                          '*****@*****.**'])
        wave = self.robot.new_wave(domain='googlewave.com', participants=['*****@*****.**'])
        d = feedparser.parse(FEED)
        wave.title = d.channel.description
        
        for entry in d.entries:
            # append markup currently only supports <p>, <div>, <b>, <strong>, <i>, <em>, <u>.
            wave.root_blip.append("\n")
            wave.root_blip.append_markup("<p><b>%s</b></p><p>%s</p><p> </p>" % (entry.title, entry.description))

        self.robot.submit(wave)
Пример #2
0
def update_defs():
    feeds = Feed.objects.filter(feed_deleted=False)
    for feed in feeds:
        try:
            feed_items = feedparser.parse(feed.feed_url)
            for entry in feed_items['entries']:
                date_published = entry.get('published', entry.get('updated'))
                if not date_published:
                    date_published = str(datetime.datetime.utcnow())
                
                protocol_index = entry['link'][0:7].find("://")
                if protocol_index != -1:
                    permalink = entry['link'][:protocol_index+3] + urlquote(entry['link'][protocol_index+3:])
                else:
                    permalink = urlquote(entry['link'])
                
                date_published = dateutil.parser.parse(date_published)
                
                date_published = (date_published - date_published.utcoffset()).replace(tzinfo=None)
                
                items_count = Item.objects.filter(
                                                  Q(item_date = date_published) |
                                                  Q(item_permalink = permalink)).filter(
                                                                                        item_feed = feed).count()
                
                if items_count == 0:
                    feed_content = entry.get('content')
                    if feed_content is not None:
                        feed_content = feed_content[0]['value']
                        content = stripper.strip_tags(feed_content)
                        clean_content = stripper.strip_tags(feed_content, ())
                    else:
                        content = None
                        clean_content = None
                    
                    i = Item(item_feed = feed,
                             item_date = date_published,
                             item_title = entry.get('title'),
                             item_content = content,
                             item_clean_content = clean_content,
                             item_author = entry.get('author'),
                             item_permalink = permalink
                             )
                    i.save()
                    
                    tags = ()
                    if 'tags' in entry:
                        for tag in entry['tags']:
                            slug  = urlquote(tag.get('term').lower())
                            try:
                                tagobj = Tag.objects.get(tag_slug=slug)
                            except:
                                tagobj = Tag(tag_name = tag['term'],
                                             tag_slug = slug,
                                             tag_count = 1)
                                tagobj.save()
                            i.item_tags(tagobj);
                        i.save()
                                                                                            
                                                                                    
        except Exception, e:
            print e                                          
                    
Пример #3
0
    def real_update(self, forced=False):
        if not self.xmlUrl: # Not a real feed
            af=self.allFeeds()
            for f in af:
                f.update()
        if self.lastModified:
            mod=self.lastModified
        else:
            mod=datetime.datetime(1970, 1, 1)

        if self.title:
            statusQueue.put(u"Updating: "+ self.title)
        d=fp.parse(self.xmlUrl, etag=self.etag, modified=mod.timetuple())
        try:
            self.lastUpdated=datetime.datetime.now()
            elixir.session.commit()
        except:
            elixir.session.rollback()

        if d.status==304: # No need to fetch
            return
        if d.status==301: # Permanent redirect
            self.xmlUrl=d.href
        if d.status==410: # Feed deleted. FIXME: tell the user and stop trying!
            return

        self.updating=True
        # Notify feed is updating
        # feedStatusQueue.put([0, self.id])
        posts=[]
        for post in d['entries']:
            try:
                # Date can be one of several fields
                if 'created_parsed' in post:
                    dkey='created_parsed'
                elif 'published_parsed' in post:
                    dkey='published_parsed'
                elif 'modified_parsed' in post:
                    dkey='modified_parsed'
                else:
                    dkey=None
                if dkey and post[dkey]:
                    date=datetime.datetime.\
                        fromtimestamp(time.mktime(post[dkey]))
                else:
                    date=datetime.datetime.now()

                # So can the "unique ID for this entry"
                if 'id' in post:
                    idkey='id'
                elif 'link' in post:
                    idkey='link'

                # So can the content

                if 'content' in post:
                    content='<hr>'.join([c.value for c in post['content']])
                elif 'summary' in post:
                    content=post['summary']
                elif 'value' in post:
                    content=post['value']

                # Rudimentary NON-html detection
                if not '<' in content:
                    content=escape(content).replace('\n\n', '<p>')

                # Author if available, else None
                author=''
                # First, we may have author_detail, which is the nicer one
                if 'author_detail' in post:
                    ad=post['author_detail']
                    author=detailToAuthor(ad)
                # Or maybe just an author
                elif 'author' in post:
                    author=post['author']

                # But we may have a list of contributors
                if 'contributors' in post:
                    # Which may have the same detail as the author's
                    author+=' - '.join([detailToAuthor(contrib) \
                                        for contrib in post[contributors]])
                if not author:
                    #FIXME: how about using the feed's author,
                    # or something like that
                    author=None

                # The link should be simple ;-)
                if 'link' in post:
                    link=post['link']
                else:
                    link=None

                # Titles may be in plain title, but a title_detail is preferred
                if 'title_detail' in post:
                    title=detailToTitle(post['title_detail'])
                else:
                    title=post['title']
                    
                # Search for enclosures
                if 'enclosures' in post:
                    enclosures=post['enclosures']
                else:
                    enclosures=None
                    
                try:
                    # FIXME: if I use date to check here, I get duplicates on
                    # posts where I use artificial date because it's not in the
                    # feed's entry. If I don't I don't re-get updated posts.

                    p = Post.get_by(feed=self, post_id=post[idkey])
                    if p:
                        if p.content<>content:
                            p.content=content
                        if p.title<>title:
                            p.title=title
                    else:
                        # This is because of google news: the same news gets
                        # reposted over and over with different post_id :-(
                        p = Post.get_by(feed=self, title=title)
                        if p:
                            if p.post_id<>post[idkey]:
                                p.post_id=post[idkey]
                            if p.content<>content:
                                p.content=content
                            if p.title<>title:
                                p.title=title
                        else:
                            p=Post(feed=self, date=date, title=title,
                                   post_id=post[idkey], content=content,
                                   author=author, link=link)
                            if self.markRead:
                                p.unread=False
                            # Tag support
                            if 'tags' in post:
                                p.tags=','.join([t.term for t in post['tags']])
                            posts.append(p)
                            
                            # Create enclosures
                            for e in enclosures:
                                enc=Enclosure(post=p, 
                                              href=e.href, 
                                              filetype=e.type, 
                                              length=e.length, 
                                              filename=None)
                    elixir.session.commit()
                except:
                    traceback.print_exc(1)
                    elixir.session.rollback()
            except KeyError:
                debug(post)
        try:
            self.updateFeedData(d)
            if 'modified' in d:
                self.lastModified=datetime.datetime(*d['modified'][:6])
            if 'etag' in d:
                self.etag=d['etag']
            elixir.session.commit()
        except:
            elixir.session.rollback()

        try:
            # Silly way to release the posts objects
            # we don't need anymore
            post_ids=[post.id for post in posts]

            if len(post_ids):
                # Mark feed UI for updating
                self.curUnread=-1
                # Fix freshness
                Post.table.update().where(
                    sql.except_(Post.table.select(Post.feed==self),
                                Post.table.select(Post.id.in_(post_ids)))).\
                                values(fresh=False).execute()
            elixir.session.commit()
        except:
            elixir.session.rollback()