def test_density_estimator(self): db = ReviewDB('tests/test_data/') nlp = NLPLengths(db.entity_db_dict['all']) histogram_comparison = HistogramComparison() char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0') histogram = char_result_cluster[0] density_estimate = histogram_comparison.density_estimator(histogram) self.assertEqual(sum(density_estimate.values()), 1.0)
def test_density_estimator(self): db = ReviewDB.load(cluster_file='tests/testing_db.csv') nlp = NLPLengths(db) histogram_comparison = HistogramComparison() char_result_cluster = nlp.char_review_length_counter('1-2-1-0-0') histogram = char_result_cluster[0] density_estimate = histogram_comparison.density_estimator(histogram) self.assertEqual(sum(density_estimate.values()), 1.0)
def test_sorensen(self): db = ReviewDB('tests/test_data/') nlp = NLPLengths(db.entity_db_dict['all']) histogram_comparison = HistogramComparison() histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0] compare_self = histogram_comparison.sorensen(histogram1, histogram1) self.assertEqual(compare_self, 0.0) trivial_histogram1 = Counter({"1": 1}) trivial_histogram2 = Counter({"1": 0}) compare_trivial = histogram_comparison.sorensen( trivial_histogram1, trivial_histogram2) self.assertEqual(compare_trivial, 1.0) more_complicated_histogram1 = Counter({"1": 1, "2": 2}) more_complicated_histogram2 = Counter({"2": 3, "3": 4}) compare_more_complicated = histogram_comparison.sorensen( more_complicated_histogram1, more_complicated_histogram2) self.assertLess((compare_more_complicated - 0.66667), .001)
def test_hellinger(self): db = ReviewDB('tests/test_data/') nlp = NLPLengths(db.entity_db_dict['all']) histogram_comparison = HistogramComparison() histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0] compare_self = histogram_comparison.hellinger(histogram1, histogram1) self.assertEqual(compare_self, 0.0) trivial_histogram1 = Counter({"1": 1}) trivial_histogram2 = Counter({"1": 0}) compare_trivial = histogram_comparison.hellinger( trivial_histogram1, trivial_histogram2) self.assertEqual(compare_trivial, 0.7071067811865475) more_complicated_histogram1 = Counter({"1": 1, "2": 2}) more_complicated_histogram2 = Counter({"2": 3, "3": 4}) compare_more_complicated = histogram_comparison.hellinger( more_complicated_histogram1, more_complicated_histogram2) self.assertLess((compare_more_complicated - 0.6822591268536838), .001)
def test_euclidean(self): db = ReviewDB.load(cluster_file='tests/testing_db.csv') nlp = NLPLengths(db) histogram_comparison = HistogramComparison() histogram1 = nlp.char_review_length_counter('1-2-1-0-0')[0] compare_self = histogram_comparison.euclidean(histogram1, histogram1) self.assertEqual(compare_self, 0.0) trivial_histogram1 = Counter({"1": 1}) trivial_histogram2 = Counter({"1": 0}) compare_trivial = histogram_comparison.euclidean( trivial_histogram1, trivial_histogram2) self.assertEqual(compare_trivial, 1.0) more_complicated_histogram1 = Counter({"1": 1, "2": 2}) more_complicated_histogram2 = Counter({"2": 3, "3": 4}) compare_more_complicated = histogram_comparison.euclidean( more_complicated_histogram1, more_complicated_histogram2) self.assertLess((compare_more_complicated - 4.24264), .001)
# set up data access CONFIG = json.load(open("./../config.json")) data_folder = os.path.join(os.environ['DATA_DIR'], CONFIG['dataset']) schema = json.load(open(os.path.join(data_folder, 'schema.json')))['schema'] database = ReviewDB(data_folder) app = Flask(__name__, static_folder='./react-app/build/static', template_folder='./react-app/build') cors = CORS(app) app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 app.config['TEMPLATES_AUTO_RELOAD'] = True histogram_comparison_utils = HistogramComparison() # [Xiong] endpoint for sending static files @app.route('/data/<path:subpath>') def data(subpath): res = send_from_directory(f'{data_folder}', subpath) return res # [Xiong] endpoint for loading cluster centroids # GET Args: # biz_id: id for locating an entity (e.g. hotel) # cid: id for locating a cluster # Returns: # A csv string of requested centroids