Exemplo n.º 1
0
    def test_link_stats(self):
        """
        TK: better tests, I'm just checking it runs various ways. for now:
        """
        for link in pageone.get('https://www.propublica.org/', pattern=p):
            assert('url' in link)

        for link in pageone.get('https://www.npr.org', pattern=p, visible_only=True):
            assert(link['visible'])
Exemplo n.º 2
0
    def test_link_stats(self):
        """
        TK: better tests, I'm just checking it runs various ways. for now:
        """
        for link in pageone.get('https://www.propublica.org/', pattern=p):
            assert ('url' in link)

        for link in pageone.get('https://www.npr.org',
                                pattern=p,
                                visible_only=True):
            assert (link['visible'])
Exemplo n.º 3
0
    def run(self):
        """
        Fetch homepage URLs, lookup content item IDS, and set number of minutes
        it's been on the homepage.
        """
        p = self.options.pop('page')
        for link in pageone.get(p, **self.options):
            u = link.get('url')

            # smartly handle urls
            u = url.prepare(u, canonicalize=False)
            if u and not u in self.url_lookup:
                u = url.prepare(u, canonicalize=True)

            # yield metrics
            if u and u in self.url_lookup:
                cids = self.url_lookup[u]
                for cid in cids:
                    yield {
                        'datetime': dates.now(),
                        'content_item_id': cid,
                        'metrics': {
                            'time_on_homepage': self.recipe.get('minutes', 60)
                        }
                    }
Exemplo n.º 4
0
    def run(self):
        """
        Fetch homepage URLs, lookup content item IDS, and set number of minutes
        it's been on the homepage.
        """
        p = self.options.pop('page')
        for link in pageone.get(p, **self.options):
            u = link.get('url')

            # smartly handle urls
            u = url.prepare(u, canonicalize=False)
            if u and not u in self.url_lookup:
                u = url.prepare(u, canonicalize=True)

            # yield metrics
            if u and u in self.url_lookup:
                cids = self.url_lookup[u]
                for cid in cids:
                    yield {
                        'datetime': dates.now(),
                        'content_item_id': cid,
                        'metrics': {
                            'time_on_homepage': self.recipe.get('minutes', 60)
                        }
                    }