def test_data_health_check(self): self.assertDictEqual(d1={ 'cases': [], 'features': ['J', 'K'] }, d2=DataExplorer(df=DATA_SET, plot=False).data_health_check())
def test_data_distribution(self): _data_distribution: dict = DataExplorer( df=DATA_SET, plot=False).data_distribution() _sample_results: dict = dict(F=_data_distribution['F'].get('Hamburg'), C=_data_distribution['C'].get('mean')) self.assertDictEqual(d1=dict(F=4, C=0.49000000000000005), d2=_sample_results)
def test_get_feature_types(self): self.assertDictEqual(d1={ 'continuous': ['C', 'G', 'H'], 'categorical': ['A', 'B', 'F', 'I', 'J', 'K'], 'ordinal': [], 'date': ['D'], 'text': ['E'] }, d2=DataExplorer(df=DATA_SET, plot=False).get_feature_types())
def test_data_typing(self): self.assertDictEqual(d1={ 'B': 'int', 'D': 'datetime', 'F': 'int', 'I': 'int', 'J': 'int', 'K': 'int' }, d2=DataExplorer(df=DATA_SET, plot=False).data_typing())
import pandas as pd import unittest from easyexplore.data_explorer import DataExplorer from easyexplore.text_miner import TextMiner DATA_SET: pd.DataFrame = pd.read_csv( filepath_or_buffer='amazon_musical_instruments_reviews.csv') ID_TEXT: dict = DataExplorer(df=DATA_SET).get_feature_types() TEXT_MINER: TextMiner = TextMiner(df=DATA_SET, features=ID_TEXT.get('id_text'), lang='en', auto_interpret_natural_language=True) class TextMinerTest(unittest.TestCase): """ Unit test for class TextMiner """ def test_clustering(self): pass def test_detect_lang(self): _lang_feature: int = len( TEXT_MINER.get_str_match(cases=list(TEXT_MINER.df.keys()), substring='_lang')) TEXT_MINER.detect_lang(sampling=True) self.assertTrue(expr=_lang_feature == 0 and len( TEXT_MINER.get_str_match(cases=list(TEXT_MINER.df.keys()), substring='_lang')) > 0)
def test_text_analyzer(self): self.assertTrue(expr=DataExplorer(df=TEXT_DATA).text_analyzer( lang='en').shape[1] > 0)
def test_break_down(self): self.assertEqual(first=0.49000000000000005, second=DataExplorer(df=DATA_SET, plot=False).break_down() ['continuous']['J']['de']['C'].get('mean'))