def twitter_bot(rss_guid=None): """ Consumes a feed and checks if there are new entries in db. If so, gets a shortened url and tweets the new status. """ if rss_guid is None: # ancestor_key = ndb.Key("RSS_GUID", rss_guid or "*norss*") # consumer = FeedConsume.get_last_rss_guid(ancestor_key) # rss_guid = consumer[0].rss_guid query = FeedConsume.gql("WHERE entry = :1", "latest") result = query.get() rss_guid = result.rss_guid else: consumer = FeedConsume(parent=ndb.Key("RSS_GUID", rss_guid or "*norss*"), rss_guid=rss_guid, entry="latest") consumer.put() url = "{}erss.cgi?rss_guid={}".format(conf("pubmed_rss"), rss_guid) feeds = feedparser.parse(url) tweets = [] for feed in feeds["items"]: pmid = (feed["link"].split("/")[-1]).rstrip("?dopt=Abstract") if "entrez?" in pmid: continue query = FeedItem.gql("WHERE pmid = :1", pmid) # if pmid not in db if (query.count() == 0): title = feed["title"] otitle = title url = feed["link"] category = feed["category"] item = FeedItem() item.pmid = pmid # shorten the url with Bitly.com shorturl = shorten_url_bitly(url) # tweet the new entry max_length = (140 - len(category) - len(shorturl) - 7) print(max_length) if len(title) > max_length: title = title[0:max_length] status = "#{}: {}... {}".format("".join(category.split()), title.rstrip(". "), shorturl) try: status = unicode(status).encode("utf-8") except UnicodeEncodeError: pass # TODO: add logging # tweet new status # tweets.append({'title': "{}...".format(title.rstrip(". ")), 'url': shorturl}) ttitle = "#{}: {}...".format("".join(category.split()), otitle[0:100].rstrip(". ")) tweets.append({'title': ttitle, 'url': shorturl}) try: update_status_twitter(status) item.put() except: pass return tweets
def get(self): feeds = self._getFeeds() logging.debug("Got %d feeds" % feeds.count()) for feed in feeds: logging.debug("Feed %s last updated %s" % (feed.name, feed.last_updated)) force = self.request.get("force") == "1" if force: logging.debug("Force option enabled") if not force and time.time() - time.mktime(datetime.datetime.timetuple(feed.last_updated)) < 3600*4: logging.debug("Feed %s doesn't need updates, skipping" % feed.name) continue logging.debug("Fetching %s" % feed.url) feed_content = urlfetch.fetch(feed.url) logging.debug("Fetched, status = %d" % feed_content.status_code) if feed_content.status_code == 200: parsed_feed = feedparser.parse(feed_content.content) feed.last_updated = datetime.datetime.now() feed.put() else: logging.error("Failed to load feed %s" % feed.name) self.error(500) linkre = re.compile("http://(?:www\.)?explosm.net/comics/\d+/?") comicre = re.compile('(http://(?:www\.)?explosm.net/db/files/Comics/[A-z0-9_\-\+]+/[A-z0-9\-_\+]+\.(gif|png))') logging.debug("Got %d entries" % len(parsed_feed.entries)) for e in parsed_feed.entries: if linkre.match(e.link): if not FeedItem.is_fetched(e.link): logging.debug("Going to fetch entry %s" % e.link) result = urlfetch.fetch(e.link) logging.debug("Fetched, status = %d" % result.status_code) if result.status_code == 200: results = comicre.findall(result.content) if results and len(results) > 0: logging.debug("Going to fetch enclosure %s" % results[0][0]) enclosure = urlfetch.fetch(results[0][0]) logging.debug("Fetched, status = %d" % enclosure.status_code) if enclosure.status_code == 200: feed_item = FeedItem() feed_item.title = e.title feed_item.url = e.link feed_item.content_type = "image/"+results[0][1] feed_item.feed = feed feed_item.date = datetime.datetime.fromtimestamp(time.mktime(e.updated_parsed)) feed_item.content = db.Text(e.description) feed_item.enclosure = enclosure.content feed_item.put() else: logging.error("Failed to fetch enclosure %s" % results[0]) else: logging.debug("Got no enclosure in %s" % e.link) else: logging.debug("Failed to download %s" % e.link) else: logging.debug("Skipping already fetch item %s" % e.link) else: logging.debug("Skipping unknown link %s" % e.link)