def test_multiple_chunks(self, data): chunks = random_split(data) chopper = TimeChopper(chunks, 5) result = list(chopper) assert len(result) == MAX_TS concatinated = pd.concat(r[1] for r in result) assert len(data) == len(concatinated), "We did not lose anything"
def test_invalid_ammo(self, phout, results): with open(phout) as fp: reader = [string_to_df(line) for line in fp.readlines()] pipeline = Aggregator( TimeChopper(DataPoller(source=reader, poll_period=0), cache_size=3), AGGR_CONFIG, True) with open(results) as fp: results_parsed = json.load(fp) for item, result in zip(pipeline, results_parsed): assert item == result
def test_partially_reversed_data(self, data): results_queue = Queue() chunks = list(random_split(data)) chunks[5], chunks[6] = chunks[6], chunks[5] pipeline = Aggregator( TimeChopper(DataPoller(source=chunks, poll_period=0.1), cache_size=3), AGGR_CONFIG, False) drain = Drain(pipeline, results_queue) drain.run() assert results_queue.qsize() == MAX_TS
def test_invalid_ammo(self, phout, expected_results): with open(os.path.join(get_test_path(), phout)) as fp: reader = [string_to_df(line) for line in fp.readlines()] pipeline = Aggregator( TimeChopper(DataPoller(source=reader, poll_period=0), cache_size=3), AGGR_CONFIG, True) with open(os.path.join(get_test_path(), expected_results)) as fp: expected_results_parsed = json.load(fp) for item, expected_result in zip(pipeline, expected_results_parsed): for key, expected_value in expected_result.items(): assert item[key] == expected_value
def test_partially_reversed_data(self, data): chunks = list(random_split(data)) chunks[5], chunks[6] = chunks[6], chunks[5] chopper = TimeChopper(chunks, 5) result = list(chopper) assert len( result ) == MAX_TS, "DataFrame is splitted into proper number of chunks" concatinated = pd.concat(r[1] for r in result) assert len(data) == len(concatinated), "We did not lose anything" assert np.allclose(concatinated.values, data.values), "We did not corrupt the data"
def test_slow_producer(self, data): results_queue = Queue() chunks = list(random_split(data)) chunks[5], chunks[6] = chunks[6], chunks[5] def producer(): for chunk in chunks: if np.random.random() > 0.5: yield None yield chunk pipeline = Aggregator( TimeChopper(DataPoller(source=producer(), poll_period=0.1), cache_size=3), AGGR_CONFIG, False) drain = Drain(pipeline, results_queue) drain.run() assert results_queue.qsize() == MAX_TS
def test_one_chunk(self, data): chopper = TimeChopper([data], 5) result = list(chopper) assert len(result) == MAX_TS concatinated = pd.concat(r[1] for r in result) assert len(data) == len(concatinated), "We did not lose anything"