def test_table_run_wrong_from(self): input_table = [('Roosevelt', 1858), ('Napoleon', 1769), ('Confucius', -551)] query_text = 'select a2 // 10, "name " + a1 from input_table_nonexistent order by a2' expected_output_table = [[-56, 'name Confucius'], [176, 'name Napoleon'], [185, 'name Roosevelt']] output_table = [] warnings = [] input_iterator = None output_writer = rbql_engine.TableWriter(output_table) tables_registry = rbql_engine.ListTableRegistry( [rbql_engine.ListTableInfo('input_table', input_table, None)], normalize_column_names=True) with self.assertRaises(Exception) as cm: rbql.query(query_text, input_iterator, output_writer, warnings, tables_registry, user_init_code='') e = cm.exception self.assertEqual( 'Unable to find input table: "input_table_nonexistent"', str(e))
def test_table_run_simple_join(self): input_table = [('Roosevelt', 1858, 'USA'), ('Napoleon', 1769, 'France'), ('Confucius', -551, 'China')] join_table = [('China', 1386), ('France', 67), ('USA', 327), ('Russia', 140)] query_text = 'select a2 // 10, b2, "name " + a1 FROM my_input_table order by a2 JOIN my_join_table on a3 == b1' expected_output_table = [[-56, 1386, 'name Confucius'], [176, 67, 'name Napoleon'], [185, 327, 'name Roosevelt']] output_table = [] warnings = [] input_iterator = None output_writer = rbql_engine.TableWriter(output_table) tables_registry = rbql_engine.ListTableRegistry( [ rbql_engine.ListTableInfo('my_input_table', input_table, None), rbql_engine.ListTableInfo('my_join_table', join_table, None), rbql_engine.ListTableInfo('unused_table', [], None) ], normalize_column_names=True) rbql.query(query_text, input_iterator, output_writer, warnings, tables_registry, user_init_code='') self.assertEqual(warnings, []) self.assertEqual(expected_output_table, output_table)
def test_table_run_simple(self): input_table = [('Roosevelt', 1858), ('Napoleon', 1769), ('Confucius', -551)] query_text = 'select a2 // 10, "name " + a1 from input_table order by a2' expected_output_table = [[-56, 'name Confucius'], [176, 'name Napoleon'], [185, 'name Roosevelt']] output_table = [] warnings = [] input_iterator = None output_writer = rbql_engine.TableWriter(output_table) tables_registry = rbql_engine.ListTableRegistry( [ rbql_engine.ListTableInfo('input_table', input_table, None), rbql_engine.ListTableInfo('unused_table', [], None) ], normalize_column_names=True) rbql.query(query_text, input_iterator, output_writer, warnings, tables_registry, user_init_code='') self.assertEqual(warnings, []) self.assertEqual(expected_output_table, output_table)
def test_simple_case(self): input_table = list() input_table.append(['name', 'value']) input_table.append(['abc', '1234']) input_table.append(['abc', '1234']) input_table.append(['efg', '100']) input_table.append(['abc', '100']) input_table.append(['cde', '12999']) input_table.append(['aaa', '2000']) input_table.append(['abc', '100']) expected_table = list() expected_table.append(['name', 'col2']) expected_table.append(['abc', '12340']) expected_table.append(['abc', '12340']) expected_table.append(['abc', '1000']) expected_table.append(['abc', '1000']) delim = ',' policy = 'quoted' csv_data = table_to_csv_string_random(input_table, delim, policy) input_stream, encoding = string_to_randomly_encoded_stream(csv_data) input_iterator = rbql_csv.CSVRecordIterator(input_stream, encoding, delim=delim, policy=policy, has_header=True) output_stream = io.BytesIO() if encoding is not None else io.StringIO() output_writer = rbql_csv.CSVWriter(output_stream, False, encoding, delim, policy) warnings = [] rbql.query('select a.name, int(a.value) * 10 where a.name == "abc"', input_iterator, output_writer, warnings) input_stream.close() self.assertEqual(warnings, []) output_stream.seek(0) output_iterator = rbql_csv.CSVRecordIterator(output_stream, encoding, delim=delim, policy=policy) output_table = output_iterator.get_all_records() output_stream.close() self.assertEqual(expected_table, output_table)
def _do_test_random_headers(self): num_rows = natural_random(0, 10) num_cols = natural_random(2, 10) input_table = list() expected_table = list() header_row = list() for col in range(num_cols): while True: if random.choice([True, False]): field_name_len = natural_random(1, 10) field_name_bytes = [] for c in range(field_name_len): field_name_bytes.append(random.randint(32, 126)) field_name = bytes( bytearray(field_name_bytes)).decode('ascii') else: field_name = random.choice( ['_foo', 'bar', 'Bar', '__foo', 'a', 'b', 'A', 'B']) if field_name not in header_row: header_row.append(field_name) break input_table.append(header_row[:]) expected_table.append(header_row[:]) all_col_nums = list(range(num_cols)) query_col_1 = random.choice(all_col_nums) all_col_nums.remove(query_col_1) query_col_2 = random.choice(all_col_nums) for row_id in range(num_rows): is_good_row = True row = list() for col_id in range(num_cols): if col_id == query_col_1: field_value = random.choice( ['foo bar good', 'foo bar bad']) if field_value != 'foo bar good': is_good_row = False elif col_id == query_col_2: field_value = random.choice(['10', '0']) if field_value != '10': is_good_row = False else: field_value = make_random_decoded_binary_csv_entry( 0, 10, restricted_chars=['\r', '\n']) row.append(field_value) input_table.append(row[:]) if is_good_row: expected_table.append(row[:]) query_col_name_1 = make_column_variable(header_row[query_col_1]) query_col_name_2 = make_column_variable(header_row[query_col_2]) query = 'select * where ({}.endswith("good") and int({}) * 2 == 20)'.format( query_col_name_1, query_col_name_2) delim = ',' policy = 'quoted' csv_data = table_to_csv_string_random(input_table, delim, policy) encoding = 'latin-1' stream = io.BytesIO(csv_data.encode(encoding)) input_stream, encoding = string_to_randomly_encoded_stream(csv_data) input_iterator = rbql_csv.CSVRecordIterator(input_stream, encoding, delim=delim, policy=policy, has_header=True) output_stream = io.BytesIO() if encoding is not None else io.StringIO() output_writer = rbql_csv.CSVWriter(output_stream, False, encoding, delim, policy) warnings = [] rbql.query(query, input_iterator, output_writer, warnings) input_stream.close() self.assertEqual(warnings, []) output_stream.seek(0) output_iterator = rbql_csv.CSVRecordIterator(output_stream, encoding, delim=delim, policy=policy) output_table = output_iterator.get_all_records() output_stream.close() self.assertEqual(expected_table, output_table)