Exemplo n.º 1
0
def analyse_comments(analysed_at=None):
    classifier = None

    if CURRENT_MODEL == MODEL_LOGISITIC_REGRESSION:
        classifier = analyse_with_logistic_regression()

    if classifier is None:
        print("Classifier is not set up. Set up classifier first.")
        return

    print("Model is ready. Starting analysis...")
    suspected = 0
    adder = DBModelAdder()
    adder.start()
    comments_for_analysis = SiteComment.comments_for_analysis(analysed_at)
    for comment in comments_for_analysis:
        comment.analysed = datetime.datetime.now()
        comment.looks_rude = classifier.classify_rude(comment)
        adder.add(comment)
        if comment.looks_rude:
            suspected += 1
    adder.done()

    print("Analysis was done for %s comments, %s suspected to be rude." %
          (str(len(comments_for_analysis)), str(suspected)))
Exemplo n.º 2
0
def analyse_with_cosine():
    stats = DocsStats()
    rude_comments = SiteComment.rude_comments()
    rude_docs = list()
    for comment in rude_comments:
        rude_docs.append(
            Document(stats, comment.id, comment.body, comment.processed_body))

    unverified_comments = SiteComment.comments_for_analysis()
    unverified_docs = list()
    for comment in unverified_comments:
        unverified_docs.append(
            Document(stats, comment.id, comment.body, comment.processed_body))

    stats.calculate_idfs()
    stats.vectorise_documents()

    cosine = CosineSimilarity(rude_docs)
    rude_cluster = cosine.biggest_cluster()
    for item in rude_cluster:
        print("- ", item.body, "\r\n")