def processSelftext(lid, body, author): for rule in _["rules"]: if "flags" in rule and "ignoreQuotedText" in rule["flags"]: body = re.sub(quotedRE, "", body) if "flags" not in rule or ("flags" in rule and "commentsOnly" not in rule["flags"]): if "user_function" in rule: try: getattr(userfunctions, rule["user_function"])(lid, body, author) except Exception, e: log.write('Error running user function "%s": %s' % (rule["user_function"], e), "error") return elif "regex" in rule and "response" in rule and "re" in rule: match = rule["re"].search(body) if match: respond("t3_%s" % lib.base36encode(lid), rule, match, author, body) break elif "string" in rule and "response" in rule: if rule["string"] in body: respond("t3_%s" % lib.base36encode(lid), rule, None, author, body) break
locations.build(_['crawl_subreddits'], _['crawl_urls']) # Crawls URLS from locations if 'runall' in argv or 'links' in argv: cur.execute("select id, url from crawl_locations where last_crawled < date_sub(now(), interval %s second)", (_['find_links_after'],)) for l in cur.fetchall(): links.get("%s?limit=%d" % (l[1], _['links_per_page'])) cur.execute("update crawl_locations set last_crawled = now() where id = %s", (l[0],)) db.commit() # Crawl eligible links if 'runall' in argv or 'comments' in argv: cur.execute("select id, permalink from t3 where last_crawled < date_sub(now(), interval %s second)", (_['recrawl_links_after'],)) for l in cur.fetchall(): for sort in _['comment_sort']: comments.get("http://www.reddit.com%s" % l[1], 't3_' + lib.base36encode(l[0]).lower(), '', "limit=%d&depth=%d&sort=%s" % (_['comment_limit_per_request'], _['comment_depth_per_request'], sort)) cur.execute("update t3 set last_crawled = now() where id = %s", (l[0],)) db.commit() sleep(_['sleep']) #Login and respond to links/comments if 'runall' in argv or 'respond' in argv: log.write("Checking for existing session...", "message") user.checkLogin() if not user.isLoggedIn: user.login() if user.isLoggedIn: log.write("Processing text and responding...", "message") #Process selftext cur.execute("select id, content, author from t3 where content is not null")