def test_persist_channels_contain_something_in_files(self): MetricsCalculation.persist_data({'X': np.array([])}, 'test') regex = re.compile('.*test*') path_to_directory = "./data/processed" directory_contents = os.listdir(path_to_directory) filtered_contents = list(filter(regex.match, directory_contents)) for filename in filtered_contents: with open(os.path.join(path_to_directory, filename), 'r') as f: lines = f.readlines() assert len(lines) > 0
def test_persist_parameters_all_files_contain_same_data(self): MetricsCalculation.persist_data({'a': 0}, 'test') regex = re.compile('.*test*') path_to_directory = "./data/processed" directory_contents = os.listdir(path_to_directory) filtered_contents = list(filter(regex.match, directory_contents)) csv_data, txt_data, json_data = load_files(path_to_directory, filtered_contents) txt_data = {k: int(v) for k, v in txt_data.items()} assert txt_data == json_data for k in json_data.keys(): assert json_data[k] == int(csv_data[k]) assert txt_data[k] == int(csv_data[k])
def test_persist_incorrect_channels_does_not_creates_files(self): MetricsCalculation.persist_data({'X': 'incorrect_data'}, 'test') regex = re.compile('.*test*') directory_contents = os.listdir("./data/processed") assert len(list(filter(regex.match, directory_contents))) == 0
def test_persist_channels_successfully_creates_files(self): MetricsCalculation.persist_data({'X': np.array([])}, 'test') regex = re.compile('.*test*') directory_contents = os.listdir("./data/processed") assert len(list(filter(regex.match, directory_contents))) == 3
# Metrics collection if os.path.isfile('./data/performance/performance_metrics.csv'): metrics = pd.read_csv('./data/performance/performance_metrics.csv') else: metrics = pd.DataFrame() channels, parameters, metrics = DataIngestor.ingest_data( './data/channels.txt', './data/parameters.txt', metrics) channels, parameters, metrics = MetricsCalculation.calculate_metrics( channels, parameters, metrics) # Performance metrics gathering for data persistence if metrics is not None: start = time.time() MetricsCalculation.persist_data(channels, 'channels') MetricsCalculation.persist_data(parameters, 'parameters') # Performance metrics gathering for data persistence if metrics is not None: end = time.time() metrics = metrics.append( { 'key': 'metrics_persisting', 'value': end - start }, ignore_index=True) if metrics is not None: metrics.to_csv('./data/performance/performance_metrics.csv', index=False)