def test_quote_readable_matching(self): volley = [ ('1000', CRONY_TITLE, CRONY_AUTHOR, "CRONY BELIEFS (SIMLER)", CRONY_QUOTE), ('1001', "Thinking in Systems: A Primer", "Donna H. Meadows", "THINKING IN SYSTEMS A PRIMER (MEADOWS)", "XXX."), ] for v in volley: source_id, title, author, exp_slug, content = v r = Readable.CreateOrUpdate(self.u, source_id, title=title, author=author, source="test") r.put() Readable.put_sd_batch([r]) self.assertEqual(r.slug, exp_slug) author_names = author.split(' ') source = "%s (%s, %s)" % (title, author_names[-1], author_names[0]) q = Quote.Create(self.u, source, content) q.put() self.assertIsNotNone(q.readable) self.assertEqual(q.readable, r.key) self.assertEqual(q.source_slug(), exp_slug) r = Readable.GetByTitleAuthor(self.u, author, title) self.assertIsNotNone(r) self.assertEqual(r.source_id, source_id) # Create another quote with no readable to link to q = Quote.Create(self.u, "xxx", "content...") q.put() self.assertIsNone(q.readable) # Fetch quotes for readable quotes = Quote.Fetch(self.u, readable_id=r.key.id()) self.assertEqual(len(quotes), 1) self.assertEqual(quotes[0].source, source)
def get_books_on_shelf(user, shelf='currently-reading'): ''' Return JSON array {title, author, isbn, image} ''' user_id = user.get_integration_prop('goodreads_user_id') readables = [] success = False if user_id: data = urllib.urlencode({ 'shelf': shelf, 'key': GR_API_KEY, 'v': 2 }) params = data url = "https://www.goodreads.com/review/list/%s.xml?%s" % (user_id, params) logging.debug("Fetching %s for %s" % (url, user)) res = urlfetch.fetch( url=url, method=urlfetch.GET, validate_certificate=True) logging.debug(res.status_code) if res.status_code == 200: xml = res.content data = etree.parse(StringIO(xml)) for r in data.getroot().find('reviews').findall('review'): book = r.find('book') isbn = book.find('isbn13').text image_url = book.find('image_url').text title = book.find('title').text authors = book.find('authors') link = book.find('link').text first_author = authors.find('author') if first_author is not None: name = first_author.find('name') if name is not None: author = name.text r = Readable.CreateOrUpdate(user, isbn, title=title, url=link, source='goodreads', image_url=image_url, author=author, type=READABLE.BOOK, read=False) readables.append(r) success = True logging.debug("Putting %d readable(s)" % len(readables)) ndb.put_multi(readables) Readable.put_sd_batch(readables) return (success, readables)
def test_quote_readable_matching(self): volley = [ ('1000', "Crony Beliefs", "Kevin Simler", "CRONY BELIEFS (SIMLER)", "I contend that the best way to understand all the crazy beliefs out there — aliens, conspiracies, and all the rest — is to analyze them as crony beliefs. Beliefs that have been \"hired\" not for the legitimate purpose of accurately modeling the world, but rather for social and political kickbacks." ), ('1001', "Thinking in Systems: A Primer", "Donna H. Meadows", "THINKING IN SYSTEMS A PRIMER (MEADOWS)", "XXX."), ] for v in volley: source_id, title, author, exp_slug, content = v r = Readable.CreateOrUpdate(self.u, source_id, title=title, author=author, source="test") r.put() Readable.put_sd_batch([r]) self.assertEqual(r.slug, exp_slug) author_names = author.split(' ') source = "%s (%s, %s)" % (title, author_names[-1], author_names[0]) q = Quote.Create(self.u, source, content) q.put() self.assertIsNotNone(q.readable) self.assertEqual(q.readable, r.key) self.assertEqual(q.source_slug(), exp_slug) r = Readable.GetByTitleAuthor(self.u, author, title) self.assertIsNotNone(r) self.assertEqual(r.source_id, source_id) # Create another quote with no readable to link to q = Quote.Create(self.u, "xxx", "content...") q.put() self.assertIsNone(q.readable) # Fetch quotes for readable quotes = Quote.Fetch(self.u, readable_id=r.key.id()) self.assertEqual(len(quotes), 1) self.assertEqual(quotes[0].source, source)
def sync(user, access_token, since_timestamp=0): ''' Return JSON array {title, author, isbn, image} Sample dict from pocket: {u'resolved_url': u'https://arxiv.org/abs/1701.06538', u'given_title': u'', u'is_article': u'1', u'sort_id': 16, u'word_count': u'221', u'status': u'0', u'has_image': u'0', u'given_url': u'https://arxiv.org/abs/1701.06538', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1485774143', u'time_updated': u'1485774143', u'time_read': u'0', u'excerpt': u'Authors: Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, Jeff Dean Abstract: The capacity of a neural network to absorb information is limited by its number of parameters.', u'resolved_title': u'Title: Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer', u'authors': {u'32207876': {u'url': u'', u'author_id': u'32207876', u'item_id': u'1576987151', u'name': u'cscs.CLcs.NEstatstat.ML'}}, u'resolved_id': u'1576987151', u'item_id': u'1576987151', u'time_favorited': u'0', u'is_index': u'0'} {u'resolved_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/', u'given_title': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-c', u'is_article': u'1', u'sort_id': 99, u'word_count': u'800', u'status': u'1', u'has_image': u'0', u'given_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/?partner=rss&emc=rss&smid=tw-nytimes', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1349951324', u'time_updated': u'1482284773', u'time_read': u'1482284772', u'excerpt': u'Your clothes, your child\u2019s toys, even the device you use to read these words may have been made in China. They are among the $100 billion of goods that the United States imports from China each year \u2014 an exchange that has become an important issue in the 2012 presidential campaign.', u'resolved_title': u'Looking Into the Eyes of ‘Made in China’', u'authors': {u'3024958': {u'url': u'', u'author_id': u'3024958', u'item_id': u'233921121', u'name': u'KERRI MACDONALD'}}, u'resolved_id': u'233843309', u'item_id': u'233921121', u'time_favorited': u'0', u'is_index': u'0'} ''' data = urllib.urlencode({ 'access_token': access_token, 'consumer_key': POCKET_CONSUMER_KEY, 'detailType': 'complete', 'since': since_timestamp, 'state': 'all' }) success = False res = urlfetch.fetch( url=GET_ENDPOINT, payload=data, method=urlfetch.POST, deadline=60, validate_certificate=True) logging.debug(res.status_code) latest_timestamp = 0 readables = [] if res.status_code == 200: data = json.loads(res.content) articles = data.get('list', {}) latest_timestamp = data.get('since', 0) #? save = [] USE_RESOLVED_TITLE = True if articles: for id, article in articles.items(): source = 'pocket' if USE_RESOLVED_TITLE: title = article.get('resolved_title') else: title = article.get('given_title') url = article.get('given_url') status = article.get('status') authors = article.get('authors') excerpt = article.get('excerpt') images = article.get('images') time_added = int(article.get('time_added', 0)) * 1000 time_read = int(article.get('time_read', 0)) * 1000 dt_added = tools.dt_from_ts(time_added) dt_read = tools.dt_from_ts(time_read) if time_read else None tags = article.get('tags', {}).keys() word_count = int(article.get('word_count', 0)) favorite = int(article.get('favorite', 0)) == 1 image_url = None author = None if images: first_image = images.get('1') if first_image: image_url = first_image.get('src') if authors: author_keys = authors.keys() if author_keys: author = authors.get(author_keys[0], {}).get('name') archived = int(status) == 1 read = archived and (not tags or 'unread' not in tags) r = Readable.CreateOrUpdate(user, source_id=id, title=title, url=url, image_url=image_url, author=author, excerpt=excerpt, favorite=favorite, dt_added=dt_added, word_count=word_count, dt_read=dt_read, tags=tags, source=source, read=read) if r: r.Update(read=archived, favorite=favorite, dt_read=dt_read) save.append(r) readables.append(r) ndb.put_multi(save) # Save all Readable.put_sd_batch(save) success = True else: logging.debug(res.headers) return (success, readables, latest_timestamp)
def sync(user, access_token): ''' Return JSON array {title, author, isbn, image} Sample dict from pocket: {u'resolved_url': u'https://arxiv.org/abs/1701.06538', u'given_title': u'', u'is_article': u'1', u'sort_id': 16, u'word_count': u'221', u'status': u'0', u'has_image': u'0', u'given_url': u'https://arxiv.org/abs/1701.06538', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1485774143', u'time_updated': u'1485774143', u'time_read': u'0', u'excerpt': u'Authors: Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, Jeff Dean Abstract: The capacity of a neural network to absorb information is limited by its number of parameters.', u'resolved_title': u'Title: Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer', u'authors': {u'32207876': {u'url': u'', u'author_id': u'32207876', u'item_id': u'1576987151', u'name': u'cscs.CLcs.NEstatstat.ML'}}, u'resolved_id': u'1576987151', u'item_id': u'1576987151', u'time_favorited': u'0', u'is_index': u'0'} {u'resolved_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/', u'given_title': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-c', u'is_article': u'1', u'sort_id': 99, u'word_count': u'800', u'status': u'1', u'has_image': u'0', u'given_url': u'http://lens.blogs.nytimes.com/2012/10/09/looking-into-the-eyes-of-made-in-china/?partner=rss&emc=rss&smid=tw-nytimes', u'favorite': u'0', u'has_video': u'0', u'time_added': u'1349951324', u'time_updated': u'1482284773', u'time_read': u'1482284772', u'excerpt': u'Your clothes, your child\u2019s toys, even the device you use to read these words may have been made in China. They are among the $100 billion of goods that the United States imports from China each year \u2014 an exchange that has become an important issue in the 2012 presidential campaign.', u'resolved_title': u'Looking Into the Eyes of ‘Made in China’', u'authors': {u'3024958': {u'url': u'', u'author_id': u'3024958', u'item_id': u'233921121', u'name': u'KERRI MACDONALD'}}, u'resolved_id': u'233843309', u'item_id': u'233921121', u'time_favorited': u'0', u'is_index': u'0'} ''' dt = datetime.now() - timedelta(days=7) init_sync_since = tools.unixtime(dt, ms=False) TS_KEY = 'pocket_last_timestamp' # Seconds since_timestamp = user.get_integration_prop(TS_KEY, init_sync_since) data = urllib.urlencode({ 'access_token': access_token, 'consumer_key': POCKET_CONSUMER_KEY, 'detailType': 'complete', 'since': since_timestamp, 'state': 'all' }) success = False logging.debug("Syncing pocket for %s since %s" % (user, dt)) res = urlfetch.fetch( url=GET_ENDPOINT, payload=data, method=urlfetch.POST, deadline=60, validate_certificate=True) logging.debug(res.status_code) latest_timestamp = 0 readables = [] if res.status_code == 200: data = json.loads(res.content) articles = data.get('list', {}) latest_timestamp = data.get('since', 0) #? save = [] USE_RESOLVED_TITLE = True if articles: for id, article in articles.items(): source = 'pocket' if USE_RESOLVED_TITLE: title = article.get('resolved_title') else: title = article.get('given_title') url = article.get('given_url') status = article.get('status') authors = article.get('authors') excerpt = article.get('excerpt') images = article.get('images') time_added = int(article.get('time_added', 0)) * 1000 time_read = int(article.get('time_read', 0)) * 1000 dt_added = tools.dt_from_ts(time_added) dt_read = tools.dt_from_ts(time_read) if time_read else None tags = article.get('tags', {}).keys() word_count = int(article.get('word_count', 0)) favorite = int(article.get('favorite', 0)) == 1 image_url = None author = None if images: first_image = images.get('1') if first_image: image_url = first_image.get('src') if authors: author_keys = authors.keys() if author_keys: author = authors.get(author_keys[0], {}).get('name') archived = int(status) == 1 read = archived and (not tags or 'unread' not in tags) r = Readable.CreateOrUpdate(user, source_id=id, title=title, url=url, image_url=image_url, author=author, excerpt=excerpt, favorite=favorite, dt_added=dt_added, word_count=word_count, dt_read=dt_read, tags=tags, source=source, read=read) if r: r.Update(read=archived, favorite=favorite, dt_read=dt_read) save.append(r) readables.append(r) ndb.put_multi(save) # Save all Readable.put_sd_batch(save) user.set_integration_prop(TS_KEY, latest_timestamp) success = True else: logging.debug(res.headers) return (success, readables, latest_timestamp)