Exemplo n.º 1
0
def api_features():
    if g.user is None:
        abort(404)

    x = int(request.args.get("x", -1))
    if x < 0:
        abort(404)

    if CURRENT_MODEL != MODEL_LOGISITIC_REGRESSION:
        abort(404)

    rude_comments = SiteComment.rude_comments()
    normal_comments = SiteComment.normal_comments()

    def get_data(comments, feature, label):
        data = list()
        for comment in comments:
            feature_value = SiteCommentFeatures.manual_feature_value(
                comment, feature)
            data.append({"x": feature_value, "label": label})
        return data

    positive_data = get_data(rude_comments, x, SiteCommentFeatures.RUDE_CLASS)
    negative_data = get_data(normal_comments, x,
                             SiteCommentFeatures.NORMAL_CLASS)

    return jsonify(
        **{
            "x_name": SiteCommentFeatures.feature_desc(x),
            "positive": positive_data,
            "negative": negative_data
        })
Exemplo n.º 2
0
def play():
    rude_comments = SiteComment.rude_comments()
    normal_comments = SiteComment.normal_comments()
    rude_words_wiki = WiktionaryOrg.humiliation_words()
    total_rude = 0
    total_normal = 0
    for comment in rude_comments:
        if len(comment.processed_body.split(' ')) == 0:
            continue

        words = [
            word for word in comment.processed_body.split(u' ')
            if word in rude_words_wiki
        ]
        if len(words) > 0:
            total_rude += 1
        # print("[Words: %s] ([%s]) || %s\r\n" % ( str(u' '.join(words)), str(u' '.join(comment.processed_body.split(u' '))), str(comment.body) ))

    for comment in normal_comments:
        if len(comment.processed_body.split(' ')) == 0:
            continue

        words = [
            word for word in comment.processed_body.split(u' ')
            if word in rude_words_wiki
        ]
        if len(words) > 0:
            total_normal += 1
#            print("[Words: %s] || %s\r\n" % ( str(words), str(comment.body) ))

    print("Total rude %s, total normal %s, comment [%s]" %
          (str(total_rude), str(total_normal), str(comment.body)))
Exemplo n.º 3
0
def analyse_with_bayes_classifier():
    rude_comments = SiteComment.rude_comments()
    normal_comments = SiteComment.normal_comments()

    classifier = BinaryNaiveBayesClassifier(True)
    classifier.train(rude_comments, normal_comments)
    classifier.print_params()

    return classifier
Exemplo n.º 4
0
def create_model():
    if CURRENT_MODEL == MODEL_LOGISITIC_REGRESSION:
        feature_list = SiteCommentFeatureList(SiteComment.rude_comments(),
                                              SiteComment.normal_comments())
        feature_maker = feature_list.maker()

        classifier = LogisticRegaression(feature_list, feature_maker, True)
        classifier.train()
        rude_total, rude_right, normal_total, normal_right = classifier.test(
            True)

        tpr = float(rude_right) / float(rude_total)
        tnr = float(normal_right) / float(normal_total)
        total_objects = float(rude_total + normal_total)
        acc = (rude_right / total_objects) * tpr + (normal_right /
                                                    total_objects) * tnr
        print("Accuracy: %s, rude: %s (%s), normal: %s (%s) " %
              (str(acc), str(rude_right), str(rude_total), str(normal_right),
               str(normal_total)))
        adder = DBModelAdder()
        adder.start()

        feature_data = feature_maker.store()
        json_fd = JSONObjectData(JSONObjectData.FEATURE_TYPE_ID,
                                 json.dumps(feature_data))
        adder.add(json_fd)

        classifier_data = classifier.store()
        classifier_extra = {
            "acc": acc,
            "rude_right": rude_right,
            "rude_total": rude_total,
            "normal_right": normal_right,
            "normal_total": normal_total
        }
        json_cd = JSONObjectData(JSONObjectData.LOGREG_TYPE_ID,
                                 json.dumps(classifier_data),
                                 json.dumps(classifier_extra))
        adder.add(json_cd)

        adder.done()
        print("A new logistic regression classifier was added to the DB.")
    else:
        print("Please specify a model to create first.")
Exemplo n.º 5
0
def analyse_with_cosine():
    stats = DocsStats()
    rude_comments = SiteComment.rude_comments()
    rude_docs = list()
    for comment in rude_comments:
        rude_docs.append(
            Document(stats, comment.id, comment.body, comment.processed_body))

    unverified_comments = SiteComment.comments_for_analysis()
    unverified_docs = list()
    for comment in unverified_comments:
        unverified_docs.append(
            Document(stats, comment.id, comment.body, comment.processed_body))

    stats.calculate_idfs()
    stats.vectorise_documents()

    cosine = CosineSimilarity(rude_docs)
    rude_cluster = cosine.biggest_cluster()
    for item in rude_cluster:
        print("- ", item.body, "\r\n")
Exemplo n.º 6
0
def dump_verified_comments():
    directory = "./dump"

    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)

    def dump_to(filename, comments):
        def date_or_none(field):
            return field.strftime(
                "%Y-%m-%d %H:%M:%S") if field is not None else None

        with open(filename, 'w', encoding="utf8") as csvfile:
            writer = csv.writer(csvfile, lineterminator='\n', delimiter=',')
            for comment in comments:
                writer.writerow([
                    comment.comment_id, comment.question_id, comment.answer_id,
                    comment.post_author_id, comment.post_score, comment.title,
                    comment.body,
                    date_or_none(comment.creation_date), comment.author_id,
                    comment.author_name, comment.diff_with_post,
                    date_or_none(comment.verified), comment.is_rude,
                    comment.verified_user_id,
                    date_or_none(comment.added),
                    date_or_none(comment.analysed), comment.looks_rude,
                    date_or_none(comment.skipped)
                ])

    rude_comments = SiteComment.rude_comments()
    dump_to(directory + "/rude_comments.csv", rude_comments)

    normal_comments = SiteComment.normal_comments()
    dump_to(directory + "/normal_comments.csv", normal_comments)

    skipped_comments = SiteComment.skipped_comments()
    dump_to(directory + "/skipped_comments.csv", skipped_comments)