def test_link_stats(self): """ TK: better tests, I'm just checking it runs various ways. for now: """ for link in pageone.get('https://www.propublica.org/', pattern=p): assert('url' in link) for link in pageone.get('https://www.npr.org', pattern=p, visible_only=True): assert(link['visible'])
def test_link_stats(self): """ TK: better tests, I'm just checking it runs various ways. for now: """ for link in pageone.get('https://www.propublica.org/', pattern=p): assert ('url' in link) for link in pageone.get('https://www.npr.org', pattern=p, visible_only=True): assert (link['visible'])
def run(self): """ Fetch homepage URLs, lookup content item IDS, and set number of minutes it's been on the homepage. """ p = self.options.pop('page') for link in pageone.get(p, **self.options): u = link.get('url') # smartly handle urls u = url.prepare(u, canonicalize=False) if u and not u in self.url_lookup: u = url.prepare(u, canonicalize=True) # yield metrics if u and u in self.url_lookup: cids = self.url_lookup[u] for cid in cids: yield { 'datetime': dates.now(), 'content_item_id': cid, 'metrics': { 'time_on_homepage': self.recipe.get('minutes', 60) } }