示例#1
0
    def test_nlplength_funcs(self):
        db = ReviewDB.load(cluster_file='tests/testing_db.csv')
        nlp = NLPLengths(db)
        #Test empty set
        empty1 = nlp.word_token_review_length_counter([])
        self.assertEqual(empty1, (Counter(), 0, 0, 0, 0))

        #Test word_token_review_length_counter
        word_result_zero = nlp.word_token_review_length_counter(0)
        print(word_result_zero)
        self.assertEqual(word_result_zero, (Counter({"12": 1}), 12.0, 12, (12, 1), 0.0))
        word_result_cluster = nlp.word_token_review_length_counter('1-2-1-0-0')
        print(word_result_cluster)
        self.assertEqual(word_result_cluster, (Counter({"22": 1, "7": 1, "6": 1}), 11.666666666666666, 7, (6, 1), 8.962886439832502))
        #Test sent_token_review_length_counter
        sent_result_zero = nlp.sent_token_review_length_counter(0)
        print(sent_result_zero)
        self.assertEqual(sent_result_zero, (Counter({"1": 1}), 1.0, 1, (1, 1), 0.0))
        sent_result_cluster = nlp.sent_token_review_length_counter('1-2-1-0-0')
        print(sent_result_cluster)
        self.assertEqual(sent_result_cluster, (Counter({"1": 2, '3':1}), 1.6666666666666667, 1, (1, 2),  1.1547005383792515))
        #Test char_review_length_counter
        char_result_zero = nlp.char_review_length_counter(0)
        print(char_result_zero)
        self.assertEqual(char_result_zero, (Counter({"53": 1}), 53.0, 53, (53,1), 0.0))
        char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
        print(char_result_cluster)
        self.assertEqual(char_result_cluster, (Counter({"101": 1, "31": 1, "30": 1}), 54.0, 31, (30, 1), 40.70626487409524))
        #Test Counter behavior when querying using a value not in the keys
        self.assertEqual(sent_result_cluster[0]['0'], 0)
示例#2
0
 def test_density_estimator(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
     histogram = char_result_cluster[0]
     density_estimate = histogram_comparison.density_estimator(histogram)
     self.assertEqual(sum(density_estimate.values()), 1.0)
示例#3
0
 def test_density_estimator(self):
     db = ReviewDB.load(cluster_file='tests/testing_db.csv')
     nlp = NLPLengths(db)
     histogram_comparison = HistogramComparison()
     char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0')
     histogram = char_result_cluster[0]
     density_estimate = histogram_comparison.density_estimator(histogram)
     self.assertEqual(sum(density_estimate.values()), 1.0)
示例#4
0
 def test_sorensen(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.sorensen(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.sorensen(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 1.0)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.sorensen(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 0.66667), .001)
示例#5
0
 def test_hellinger(self):
     db = ReviewDB('tests/test_data/')
     nlp = NLPLengths(db.entity_db_dict['all'])
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.hellinger(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.hellinger(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 0.7071067811865475)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.hellinger(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 0.6822591268536838), .001)
示例#6
0
 def test_euclidean(self):
     db = ReviewDB.load(cluster_file='tests/testing_db.csv')
     nlp = NLPLengths(db)
     histogram_comparison = HistogramComparison()
     histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0]
     compare_self = histogram_comparison.euclidean(histogram1, histogram1)
     self.assertEqual(compare_self, 0.0)
     trivial_histogram1 = Counter({"1": 1})
     trivial_histogram2 = Counter({"1": 0})
     compare_trivial = histogram_comparison.euclidean(
         trivial_histogram1, trivial_histogram2)
     self.assertEqual(compare_trivial, 1.0)
     more_complicated_histogram1 = Counter({"1": 1, "2": 2})
     more_complicated_histogram2 = Counter({"2": 3, "3": 4})
     compare_more_complicated = histogram_comparison.euclidean(
         more_complicated_histogram1, more_complicated_histogram2)
     self.assertLess((compare_more_complicated - 4.24264), .001)