def test_many_batches(self): # Construct expected results int_values = [100, 200, 300, 400] str_values = ['bar1', 'bar2', 'bar3', 'bar4'] # Create cells batches to populate QueryResult batch_1 = ProtoFactory().CellsBatch() batch_1.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch_1.varint_cells.extend(int_values[:2]) batch_1.string_cells = "\0".join(str_values[:2]) batch_1.is_last_batch = False batch_2 = ProtoFactory().CellsBatch() batch_2.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch_2.varint_cells.extend(int_values[2:]) batch_2.string_cells = "\0".join(str_values[2:]) batch_2.is_last_batch = True # Get result from api function qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch_1, batch_2]) # Assert results are as expected for num, row in enumerate(qr_iterator): self.assertEqual(row.foo_id, str_values[num]) self.assertEqual(row.foo_num, int_values[num])
def test_many_batches(self): int_values = [100, 200, 300, 400] str_values = ['bar1', 'bar2', 'bar3', 'bar4'] batch_1 = ProtoFactory().CellsBatch() batch_1.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch_1.varint_cells.extend(int_values[:2]) batch_1.string_cells = "\0".join(str_values[:2]) + "\0" batch_1.is_last_batch = False batch_2 = ProtoFactory().CellsBatch() batch_2.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch_2.varint_cells.extend(int_values[2:]) batch_2.string_cells = "\0".join(str_values[2:]) + "\0" batch_2.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch_1, batch_2]) for num, row in enumerate(qr_iterator): self.assertEqual(row.foo_id, str_values[num]) self.assertEqual(row.foo_num, int_values[num])
def test_empty_batch(self): batch = ProtoFactory().CellsBatch() batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator([], [batch]) for num, row in enumerate(qr_iterator): self.assertIsNone(row.foo_id) self.assertIsNone(row.foo_num)
def test_empty_batch_as_pandas(self): batch = ProtoFactory().CellsBatch() batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator([], [batch]) qr_df = qr_iterator.as_pandas_dataframe() for num, row in qr_df.iterrows(): self.assertEqual(row['foo_id'], str_values[num]) self.assertEqual(row['foo_num'], int_values[num])
def test_empty_batch(self): # Create cells batch to populate QueryResult batch = ProtoFactory().CellsBatch() batch.is_last_batch = True # Get result from api function qr_iterator = TraceProcessor.QueryResultIterator([], [batch]) # Assert results are as expected for num, row in enumerate(qr_iterator): self.assertIsNone(row.foo_id) self.assertIsNone(row.foo_num)
def test_invalid_cell_type_as_pandas(self): batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_INVALID, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend([100, 200]) batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) # In this batch we declare the columns types to be CELL_INVALID, # CELL_VARINT but that doesn't match the data which are both ints* # so we should raise a TraceProcessorException. with self.assertRaises(TraceProcessorException): qr_df = qr_iterator.as_pandas_dataframe()
def test_incorrect_columns_batch_as_pandas(self): batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend([100, 200]) batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator( ['foo_id', 'foo_num', 'foo_dur', 'foo_ms'], [batch]) # It's always the case that the number of cells is a multiple of the number # of columns. However, here this is clearly not the case, so when the # iterator tries to access the cell for the third column, it raises an # IndexError due to having exhausted the cells list. with self.assertRaises(IndexError): qr_df = qr_iterator.as_pandas()
def test_incorrect_columns_batch(self): int_values = [100, 200] # Create cells batch to populate QueryResult batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend(int_values) batch.is_last_batch = True # Get result from api function qr_iterator = TraceProcessor.QueryResultIterator( ['foo_id', 'foo_num', 'foo_dur', 'foo_ms'], [batch]) # Assert results are as expected with self.assertRaises(Exception): for row in qr_iterator: pass
def test_one_batch(self): int_values = [100, 200] str_values = ['bar1', 'bar2'] batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend(int_values) batch.string_cells = "\0".join(str_values) batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) for num, row in enumerate(qr_iterator): self.assertEqual(row.foo_id, str_values[num]) self.assertEqual(row.foo_num, int_values[num])
def test_incorrect_cells_batch_as_pandas(self): str_values = ['bar1', 'bar2'] batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch.string_cells = "\0".join(str_values) batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) # The batch specifies there ought to be 2 cells of type VARINT and 2 cells # of type STRING, but there are no string cells defined in the batch. Thus # an IndexError occurs as it tries to access the empty string cells list. with self.assertRaises(IndexError): qr_df = qr_iterator.as_pandas()
def test_incorrect_cells_batch(self): str_values = ['bar1', 'bar2'] # Create cells batch to populate QueryResult batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch.string_cells = "\0".join(str_values) batch.is_last_batch = True # Get result from api function qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) # Assert results are as expected with self.assertRaises(Exception): for row in qr_iterator: pass
def test_one_batch_as_pandas(self): int_values = [100, 200] str_values = ['bar1', 'bar2'] batch = ProtoFactory().CellsBatch() batch.cells.extend([ TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT, TestQueryResultIterator.CELL_STRING, TestQueryResultIterator.CELL_VARINT ]) batch.varint_cells.extend(int_values) batch.string_cells = "\0".join(str_values) + "\0" batch.is_last_batch = True qr_iterator = TraceProcessor.QueryResultIterator(['foo_id', 'foo_num'], [batch]) qr_df = qr_iterator.as_pandas_dataframe() for num, row in qr_df.iterrows(): self.assertEqual(row['foo_id'], str_values[num]) self.assertEqual(row['foo_num'], int_values[num])