def get(tags=None): try: coll = getCollection("news", "articles") tags = None # skips tags if tags: # BROKEN - skipped for now print "There are tags!" tags = tags.lower().split(',') can_have = [ {"tags" : t[1:]} for t in tags if t[0] != '^' ] #cant_have = [ {"tags" : t[1:]} for t in tags if t[0] == '^' ] params = {"$or" : can_have} articles = coll.find(params) else: articles = coll.find() except: articles = [news.createArticle("Error", "Error", "Error", "Error", "Error")] return {"articles" : articles.sort("timestamp", -1).limit(50)}
def main(): import news from db_wrapper import loadCredentials, getDatabase, getCollection, addArticles try: creds = loadCredentials() except Exception as e: creds = None while True: out = [] try: new_articles = [] source_fns = [news.NYT_mostPopular, news.NPR_news, news.HN_frontPage, news.TNY_news, news.AP_topNews] for src in source_fns: try: new_articles.extend(src()) except Exception as e: print "Error with source", src print "\t", e coll = getCollection("news", "articles", creds) print "number of old articles: %d" % coll.count() added = 0 for a in new_articles: print "a.url = %s" % a["url"] x = coll.find_one({"url": a["url"]}) if x: print "\t already exists: %d" % x["timestamp"] if not x: print "\t Added to DB" coll.insert(a) added += 1 print "Added %d new articles" % added except Exception as e: print "Failed to save to database, see stderr logs" print >> sys.stderr, "Exception:", e time.sleep(120) # repeat this command ever 3 minutes
def viewtext(): url = request.GET.get('url', "ERROR, please try a different link") try: DBarticles = getCollection("news", "articles") match = DBarticles.find_one({"url":url}) if match: out = match['html'] print "Cache-hit => %s" % url if not out: print "\tNope, refetching" title, _, body = news.viewtext(url) try: body = news.html_escape(body) # clean up the body? except Exception as e: print "!!! Could not HTML escape the body of text" print e out = json.dumps({"title":match['title'], "body":body, "url":url}) match['html'] = out DBarticles.save(match) # can't change size on capped collection - how to fix? return out except Exception as e: print >> sys.stderr, e return {"title":"Error" ,"body":"Error", "url":"Error"}