def check_if_spam_json(json_data): text_data = json.loads(json_data)["data"] if text_data == "hb": return False, None, "" try: data = json.loads(text_data) except ValueError: GlobalVars.charcoal_hq.send_message( u"Encountered ValueError parsing the following:\n{0}".format( json_data), False) return False, None, "" if "ownerUrl" not in data: # owner's account doesn't exist anymore, no need to post it in chat: # http://chat.stackexchange.com/transcript/message/18380776#18380776 return False, None, "" title = data["titleEncodedFancy"] title = unescape_title(title) body = data["bodySummary"] poster = data["ownerDisplayName"] url = data["url"] post_id = str(data["id"]) print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace") site = data["siteBaseHostAddress"] site = site.encode("ascii", errors="replace") sys.stdout.flush() is_spam, reason, why = check_if_spam(title, body, poster, url, site, post_id, False, True, 1, 0) return is_spam, reason, why
def check_if_spam_json(json_data): text_data = json.loads(json_data)["data"] if text_data == "hb": return False, None, "" try: data = json.loads(text_data) except ValueError: GlobalVars.charcoal_hq.send_message(u"Encountered ValueError parsing the following:\n{0}".format(json_data), False) return False, None, "" if "ownerUrl" not in data: # owner's account doesn't exist anymore, no need to post it in chat: # http://chat.stackexchange.com/transcript/message/18380776#18380776 return False, None, "" title = data["titleEncodedFancy"] title = parsing.unescape_title(title) body = data["bodySummary"] poster = data["ownerDisplayName"] url = data["url"] post_id = str(data["id"]) print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace") site = data["siteBaseHostAddress"] site = site.encode("ascii", errors="replace") sys.stdout.flush() is_spam, reason, why = check_if_spam(title=title, body=body, user_name=poster, user_url=url, post_site=site, post_id=post_id, is_answer=False, body_is_summary=True, owner_rep=1, post_score=0) return is_spam, reason, why
def check_if_spam_json(data): d = json.loads(json.loads(data)["data"]) try: _ = d["ownerUrl"] # noqa except: # owner's account doesn't exist anymore, no need to post it in chat: # http://chat.stackexchange.com/transcript/message/18380776#18380776 return False, None, "" title = d["titleEncodedFancy"] title = unescape_title(title) body = d["bodySummary"] poster = d["ownerDisplayName"] url = d["url"] post_id = str(d["id"]) print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace") quality_score = bayesian_score(title) print quality_score if quality_score < 0.3 and d["siteBaseHostAddress"] == "stackoverflow.com": print GlobalVars.bayesian_testroom.send_message( "[ SmokeDetector | BayesianBeta ] Quality score " + str(quality_score * 100) + ": [" + title + "](" + url + ")") site = d["siteBaseHostAddress"] site = site.encode("ascii", errors="replace") sys.stdout.flush() is_spam, reason, why = check_if_spam(title, body, poster, url, site, post_id, False, True) return is_spam, reason, why
def handle_spam_json(data, reason): try: d = json.loads(json.loads(data)["data"]) title = unescape_title(d["titleEncodedFancy"]) poster = d["ownerDisplayName"] site = d["siteBaseHostAddress"] url = d["url"] poster_url = d["ownerUrl"] post_id = str(d["id"]) handle_spam(title, poster, site, url, poster_url, post_id, reason, False) except: print "NOP"
def handle_spam_json(data, reason, why=""): try: d = json.loads(json.loads(data)["data"]) title = unescape_title(d["titleEncodedFancy"]) body = d["bodySummary"] poster = d["ownerDisplayName"] site = d["siteBaseHostAddress"] url = d["url"] poster_url = d["ownerUrl"] post_id = str(d["id"]) handle_spam(title, body, poster, site, url, poster_url, post_id, reason, False, why) except: print "NOP"
def handle_spam_json(data, reason, why=""): try: d = json.loads(json.loads(data)["data"]) title = unescape_title(d["titleEncodedFancy"]) body = d["bodySummary"] poster = d["ownerDisplayName"] site = d["siteBaseHostAddress"] url = d["url"] poster_url = d["ownerUrl"] post_id = str(d["id"]) handle_spam(title, body, poster, site, url, poster_url, post_id, reason, False, why) except: exc_type, exc_obj, exc_tb = sys.exc_info() excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
def check_if_spam_json(data): d = json.loads(json.loads(data)["data"]) try: _ = d["ownerUrl"] # noqa except: # owner's account doesn't exist anymore, no need to post it in chat: # http://chat.stackexchange.com/transcript/message/18380776#18380776 return False, None, "" title = d["titleEncodedFancy"] title = unescape_title(title) body = d["bodySummary"] poster = d["ownerDisplayName"] url = d["url"] post_id = str(d["id"]) print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace") site = d["siteBaseHostAddress"] site = site.encode("ascii", errors="replace") sys.stdout.flush() is_spam, reason, why = check_if_spam(title, body, poster, url, site, post_id, False, True, 1) return is_spam, reason, why
def check_if_spam_json(data): d = json.loads(json.loads(data)["data"]) try: _ = d["ownerUrl"] # noqa except: # owner's account doesn't exist anymore, no need to post it in chat: # http://chat.stackexchange.com/transcript/message/18380776#18380776 return False, None, "" title = d["titleEncodedFancy"] title = unescape_title(title) body = d["bodySummary"] poster = d["ownerDisplayName"] url = d["url"] post_id = str(d["id"]) print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace") site = d["siteBaseHostAddress"] site = site.encode("ascii", errors="replace") sys.stdout.flush() is_spam, reason, why = check_if_spam(title, body, poster, url, site, post_id, False, True, 1, 0) return is_spam, reason, why
def check_if_spam_json(data): d = json.loads(json.loads(data)["data"]) try: _ = d["ownerUrl"] # noqa except: # owner's account doesn't exist anymore, no need to post it in chat: # http://chat.stackexchange.com/transcript/message/18380776#18380776 return False, None title = d["titleEncodedFancy"] title = unescape_title(title) body = d["bodySummary"] poster = d["ownerDisplayName"] url = d["url"] post_id = str(d["id"]) print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace") quality_score = bayesian_score(title) print quality_score if quality_score < 0.3 and d["siteBaseHostAddress"] == "stackoverflow.com": print GlobalVars.bayesian_testroom.send_message("[ SmokeDetector | BayesianBeta ] Quality score " + str(quality_score * 100) + ": [" + title + "](" + url + ")") site = d["siteBaseHostAddress"] site = site.encode("ascii", errors="replace") sys.stdout.flush() is_spam, reason = check_if_spam(title, body, poster, url, site, post_id, False, True) return is_spam, reason