示例#1
0
    def test_generate_feed_entries(self):
        results = [
            e for e in feeds.generate_feed_entries(
                "http://gilesbowkett.blogspot.com/feeds/posts/default")
        ]
        for e in results:
            raw_content = e.get('raw_content')
            stripped_content = e.get('stripped_content')

            # debugging
            # print e.get('title')
            # print 'stripped_content:\n%s' % stripped_content
            # print 'raw_content:\n%s' % raw_content
            # print

            self.assert_(e.has('title'), "missing: title")
            self.assert_(e.has('etag'), "missing: etag")
            self.assert_(e.has('modified'), "missing: modified")
            print "modified: %s" % e.modified
            self.assert_(e.has('link'), "missing: link")

            # TODO: verify no HTML in stripped content
            # stripped content can be an empty string

            # not all posts have content, it appears
            self.assert_(stripped_content is not None,
                         "missing: stripped_content")
            self.assert_(raw_content is not None, "missing: raw_content")
示例#2
0
 def get(self, feed_url):
     feed_url = "http://%s" % feed_url
     helper = RequestHelper(self)
     entries = [
         dict(title=e.title,
              link=e.link,
              content=e.stripped_content,
              modified=str(e.modified))
         for e in generate_feed_entries(feed_url)
     ]
     helper.write_json(entries)
示例#3
0
def ingest_feed_entries(feed, user, error_call=None):
    """
    yields:
        (artifact guid, entry) tuple
    """
    # TODO: use etag from previous ingest
    for entry in feeds.generate_feed_entries(feed.url):
        try:
            stripped_content = entry.get("stripped_content")
            if stripped_content:
                # ensures this is a non-empty entry
                link = entry.get("link")
                raw_modified = entry.get("modified")
                if raw_modified:
                    modified = datetime(*raw_modified[0:-2])
                else:
                    modified = None
                logging.debug("%s modified %s (%s)" %
                              (link, modified, modified.__class__))
                url_resource = UrlResourceAccessor.get_or_create(
                    link, source_modified=modified, feed=feed)

                # TODO: check if there is already an artifact for this resource
                info_key, content_key, source_key, created = ArtifactAccessor.find_or_create(
                    source=feed.artifact_source.name,
                    content_type="text/plain",
                    body=stripped_content,
                    url=link,
                    url_resource=url_resource,
                    modified_by=user)

                yield info_key.name(), entry, created
        except Exception, e:
            if error_call:
                error_call(entry, e)
            else:
                raise e
示例#4
0
def ingest_feed_entries(feed, user, error_call=None):
    """
    yields:
        (artifact guid, entry) tuple
    """
    # TODO: use etag from previous ingest
    for entry in feeds.generate_feed_entries(feed.url):
        try:
            stripped_content = entry.get("stripped_content")
            if stripped_content:
                # ensures this is a non-empty entry
                link = entry.get("link")
                raw_modified = entry.get("modified")
                if raw_modified:
                    modified = datetime(*raw_modified[0:-2])
                else:
                    modified = None
                logging.debug("%s modified %s (%s)" % (link, modified, modified.__class__))
                url_resource = UrlResourceAccessor.get_or_create(link,
                    source_modified=modified,
                    feed=feed)
            
                # TODO: check if there is already an artifact for this resource
                info_key, content_key, source_key, created = ArtifactAccessor.find_or_create(source=feed.artifact_source.name,
                    content_type="text/plain",
                    body=stripped_content,
                    url = link,
                    url_resource=url_resource,
                    modified_by=user)
                
                yield info_key.name(), entry, created
        except Exception, e:
            if error_call:
                error_call(entry, e)
            else:
                raise e
示例#5
0
  def test_generate_feed_entries(self):
    results = [e for e in feeds.generate_feed_entries("http://gilesbowkett.blogspot.com/feeds/posts/default")]
    for e in results:
        raw_content = e.get('raw_content')
        stripped_content = e.get('stripped_content')
        
        # debugging
        # print e.get('title')
        # print 'stripped_content:\n%s' % stripped_content
        # print 'raw_content:\n%s' % raw_content
        # print
        
        self.assert_(e.has('title'), "missing: title")
        self.assert_(e.has('etag'), "missing: etag")
        self.assert_(e.has('modified'), "missing: modified")
        print "modified: %s" % e.modified
        self.assert_(e.has('link'), "missing: link")

        # TODO: verify no HTML in stripped content
        # stripped content can be an empty string
        
        # not all posts have content, it appears
        self.assert_(stripped_content is not None, "missing: stripped_content")
        self.assert_(raw_content is not None, "missing: raw_content")
示例#6
0
 def get(self, feed_url):
     feed_url = "http://%s" % feed_url
     helper = RequestHelper(self)
     entries = [dict(title=e.title, link=e.link, content=e.stripped_content, modified=str(e.modified)) for e in generate_feed_entries(feed_url)]
     helper.write_json(entries)