Exemplo n.º 1
0
def extract_results(labels, original_data, review_bodys):
    # print number of elements in each cluster
    cluster_counts = Counter(labels)
    print(cluster_counts)

    clusters = {}
    pattern_matcher = PatternMatcher()
    # find and print dbscan result on actual text data - review_bodys
    for i in set(labels):
        if i != -1:  # do not print if noise (-1)
            clusters[i] = []
            print(i, "----")
            for x in range(len(review_bodys)):
                if labels[x] == i:

                    print(">>>", (review_bodys[x]))

                    sentence = get_review(review_bodys[x])
                    matches = pattern_matcher.find_matches(sentence)
                    clusters[i].append((review_bodys[x][0], sentence, matches))

                    print(clusters[i], "\n")
    print(clusters)
    input()
    return pattern_matcher.extract_objects(clusters)