示例#1
0
def check_if_spam_json(json_data):
    text_data = json.loads(json_data)["data"]
    if text_data == "hb":
        return False, None, ""
    try:
        data = json.loads(text_data)
    except ValueError:
        GlobalVars.charcoal_hq.send_message(
            u"Encountered ValueError parsing the following:\n{0}".format(
                json_data), False)
        return False, None, ""
    if "ownerUrl" not in data:
        # owner's account doesn't exist anymore, no need to post it in chat:
        # http://chat.stackexchange.com/transcript/message/18380776#18380776
        return False, None, ""
    title = data["titleEncodedFancy"]
    title = unescape_title(title)
    body = data["bodySummary"]
    poster = data["ownerDisplayName"]
    url = data["url"]
    post_id = str(data["id"])
    print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii",
                                                           errors="replace")
    site = data["siteBaseHostAddress"]
    site = site.encode("ascii", errors="replace")
    sys.stdout.flush()
    is_spam, reason, why = check_if_spam(title, body, poster, url, site,
                                         post_id, False, True, 1, 0)
    return is_spam, reason, why
示例#2
0
def check_if_spam_json(json_data):
    text_data = json.loads(json_data)["data"]
    if text_data == "hb":
        return False, None, ""
    try:
        data = json.loads(text_data)
    except ValueError:
        GlobalVars.charcoal_hq.send_message(u"Encountered ValueError parsing the following:\n{0}".format(json_data), False)
        return False, None, ""
    if "ownerUrl" not in data:
        # owner's account doesn't exist anymore, no need to post it in chat:
        # http://chat.stackexchange.com/transcript/message/18380776#18380776
        return False, None, ""
    title = data["titleEncodedFancy"]
    title = parsing.unescape_title(title)
    body = data["bodySummary"]
    poster = data["ownerDisplayName"]
    url = data["url"]
    post_id = str(data["id"])
    print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace")
    site = data["siteBaseHostAddress"]
    site = site.encode("ascii", errors="replace")
    sys.stdout.flush()
    is_spam, reason, why = check_if_spam(title=title,
                                         body=body,
                                         user_name=poster,
                                         user_url=url,
                                         post_site=site,
                                         post_id=post_id,
                                         is_answer=False,
                                         body_is_summary=True,
                                         owner_rep=1,
                                         post_score=0)
    return is_spam, reason, why
示例#3
0
def check_if_spam_json(data):
    d = json.loads(json.loads(data)["data"])
    try:
        _ = d["ownerUrl"]  # noqa
    except:
        # owner's account doesn't exist anymore, no need to post it in chat:
        # http://chat.stackexchange.com/transcript/message/18380776#18380776
        return False, None, ""
    title = d["titleEncodedFancy"]
    title = unescape_title(title)
    body = d["bodySummary"]
    poster = d["ownerDisplayName"]
    url = d["url"]
    post_id = str(d["id"])
    print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii",
                                                           errors="replace")
    quality_score = bayesian_score(title)
    print quality_score
    if quality_score < 0.3 and d["siteBaseHostAddress"] == "stackoverflow.com":
        print GlobalVars.bayesian_testroom.send_message(
            "[ SmokeDetector | BayesianBeta ] Quality score " +
            str(quality_score * 100) + ": [" + title + "](" + url + ")")
    site = d["siteBaseHostAddress"]
    site = site.encode("ascii", errors="replace")
    sys.stdout.flush()
    is_spam, reason, why = check_if_spam(title, body, poster, url, site,
                                         post_id, False, True)
    return is_spam, reason, why
示例#4
0
def handle_spam_json(data, reason):
    try:
        d = json.loads(json.loads(data)["data"])
        title = unescape_title(d["titleEncodedFancy"])
        poster = d["ownerDisplayName"]
        site = d["siteBaseHostAddress"]
        url = d["url"]
        poster_url = d["ownerUrl"]
        post_id = str(d["id"])
        handle_spam(title, poster, site, url, poster_url, post_id, reason, False)
    except:
        print "NOP"
示例#5
0
def handle_spam_json(data, reason, why=""):
    try:
        d = json.loads(json.loads(data)["data"])
        title = unescape_title(d["titleEncodedFancy"])
        body = d["bodySummary"]
        poster = d["ownerDisplayName"]
        site = d["siteBaseHostAddress"]
        url = d["url"]
        poster_url = d["ownerUrl"]
        post_id = str(d["id"])
        handle_spam(title, body, poster, site, url, poster_url, post_id, reason, False, why)
    except:
        print "NOP"
示例#6
0
def handle_spam_json(data, reason, why=""):
    try:
        d = json.loads(json.loads(data)["data"])
        title = unescape_title(d["titleEncodedFancy"])
        body = d["bodySummary"]
        poster = d["ownerDisplayName"]
        site = d["siteBaseHostAddress"]
        url = d["url"]
        poster_url = d["ownerUrl"]
        post_id = str(d["id"])
        handle_spam(title, body, poster, site, url, poster_url, post_id, reason, False, why)
    except:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        excepthook.uncaught_exception(exc_type, exc_obj, exc_tb)
示例#7
0
def check_if_spam_json(data):
    d = json.loads(json.loads(data)["data"])
    try:
        _ = d["ownerUrl"]  # noqa
    except:
        # owner's account doesn't exist anymore, no need to post it in chat:
        # http://chat.stackexchange.com/transcript/message/18380776#18380776
        return False, None, ""
    title = d["titleEncodedFancy"]
    title = unescape_title(title)
    body = d["bodySummary"]
    poster = d["ownerDisplayName"]
    url = d["url"]
    post_id = str(d["id"])
    print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace")
    site = d["siteBaseHostAddress"]
    site = site.encode("ascii", errors="replace")
    sys.stdout.flush()
    is_spam, reason, why = check_if_spam(title, body, poster, url, site, post_id, False, True, 1)
    return is_spam, reason, why
示例#8
0
def check_if_spam_json(data):
    d = json.loads(json.loads(data)["data"])
    try:
        _ = d["ownerUrl"]  # noqa
    except:
        # owner's account doesn't exist anymore, no need to post it in chat:
        # http://chat.stackexchange.com/transcript/message/18380776#18380776
        return False, None, ""
    title = d["titleEncodedFancy"]
    title = unescape_title(title)
    body = d["bodySummary"]
    poster = d["ownerDisplayName"]
    url = d["url"]
    post_id = str(d["id"])
    print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii",
                                                           errors="replace")
    site = d["siteBaseHostAddress"]
    site = site.encode("ascii", errors="replace")
    sys.stdout.flush()
    is_spam, reason, why = check_if_spam(title, body, poster, url, site,
                                         post_id, False, True, 1, 0)
    return is_spam, reason, why
示例#9
0
def check_if_spam_json(data):
    d = json.loads(json.loads(data)["data"])
    try:
        _ = d["ownerUrl"]  # noqa
    except:
        # owner's account doesn't exist anymore, no need to post it in chat:
        # http://chat.stackexchange.com/transcript/message/18380776#18380776
        return False, None
    title = d["titleEncodedFancy"]
    title = unescape_title(title)
    body = d["bodySummary"]
    poster = d["ownerDisplayName"]
    url = d["url"]
    post_id = str(d["id"])
    print time.strftime("%Y-%m-%d %H:%M:%S"), title.encode("ascii", errors="replace")
    quality_score = bayesian_score(title)
    print quality_score
    if quality_score < 0.3 and d["siteBaseHostAddress"] == "stackoverflow.com":
        print GlobalVars.bayesian_testroom.send_message("[ SmokeDetector | BayesianBeta ] Quality score " + str(quality_score * 100) + ": [" + title + "](" + url + ")")
    site = d["siteBaseHostAddress"]
    site = site.encode("ascii", errors="replace")
    sys.stdout.flush()
    is_spam, reason = check_if_spam(title, body, poster, url, site, post_id, False, True)
    return is_spam, reason