def test_simple_case(self): input_table = list() input_table.append(['name', 'value']) input_table.append(['abc', '1234']) input_table.append(['abc', '1234']) input_table.append(['efg', '100']) input_table.append(['abc', '100']) input_table.append(['cde', '12999']) input_table.append(['aaa', '2000']) input_table.append(['abc', '100']) expected_table = list() expected_table.append(['abc', '12340']) expected_table.append(['abc', '12340']) expected_table.append(['abc', '1000']) expected_table.append(['abc', '1000']) delim = ',' policy = 'quoted' csv_data = table_to_csv_string_random(input_table, delim, policy) input_stream, encoding = string_to_randomly_encoded_stream(csv_data) input_iterator = rbql_csv.CSVRecordIterator(input_stream, True, encoding, delim=delim, policy=policy) output_stream = io.BytesIO() if encoding is not None else io.StringIO() output_writer = rbql_csv.CSVWriter(output_stream, False, encoding, delim, policy) error_info, warnings = rbql.generic_run('select a.name, int(a.value) * 10 where NR > 1 and a.name == "abc"', input_iterator, output_writer) self.assertEqual(error_info, None) self.assertEqual(warnings, []) output_stream.seek(0) output_iterator = rbql_csv.CSVRecordIterator(output_stream, True, encoding, delim=delim, policy=policy) output_table = output_iterator.get_all_records() self.assertEqual(expected_table, output_table)
def write_and_parse_back(table, encoding, delim, policy): writer_stream = io.BytesIO() if encoding is not None else io.StringIO() line_separator = random.choice(line_separators) writer = rbql_csv.CSVWriter(writer_stream, False, encoding, delim, policy, line_separator) writer._write_all(table) assert not len(writer.get_warnings()) writer_stream.seek(0) record_iterator = rbql_csv.CSVRecordIterator(writer_stream, True, encoding, delim=delim, policy=policy) parsed_table = record_iterator.get_all_records() return parsed_table
def test_monocolumn_write_failure(self): encoding = None writer_stream = io.StringIO() delim = None policy = 'monocolumn' table = [["this will not", "work"], ["as monocolumn", "table"]] writer = rbql_csv.CSVWriter(writer_stream, True, encoding, delim, policy, '\n') with self.assertRaises(Exception) as cm: writer._write_all(table) e = cm.exception self.assertTrue(str(e).find('some records have more than one field') != -1)
def test_output_warnings(self): encoding = None writer_stream = io.StringIO() delim = ',' policy = 'simple' table = [["hello,world", None], ["hello", "world"]] writer = rbql_csv.CSVWriter(writer_stream, False, encoding, delim, policy, '\n') writer._write_all(table) writer_stream.seek(0) actual_data = writer_stream.getvalue() expected_data = 'hello,world,\nhello,world\n' self.assertEqual(expected_data, actual_data) actual_warnings = writer.get_warnings() expected_warnings = ['None values in output were replaced by empty strings', 'Some output fields contain separator'] self.assertEqual(expected_warnings, actual_warnings)
def _do_test_random_headers(self): num_rows = natural_random(0, 10) num_cols = natural_random(2, 10) input_table = list() expected_table = list() header_row = list() for col in range(num_cols): while True: if random.choice([True, False]): field_name_len = natural_random(1, 10) field_name_bytes = [] for c in range(field_name_len): field_name_bytes.append(random.randint(32, 126)) field_name = bytes( bytearray(field_name_bytes)).decode('ascii') else: field_name = random.choice( ['_foo', 'bar', 'Bar', '__foo', 'a', 'b', 'A', 'B']) if field_name not in header_row: header_row.append(field_name) break input_table.append(header_row[:]) expected_table.append(header_row[:]) all_col_nums = list(range(num_cols)) query_col_1 = random.choice(all_col_nums) all_col_nums.remove(query_col_1) query_col_2 = random.choice(all_col_nums) for row_id in range(num_rows): is_good_row = True row = list() for col_id in range(num_cols): if col_id == query_col_1: field_value = random.choice( ['foo bar good', 'foo bar bad']) if field_value != 'foo bar good': is_good_row = False elif col_id == query_col_2: field_value = random.choice(['10', '0']) if field_value != '10': is_good_row = False else: field_value = make_random_decoded_binary_csv_entry( 0, 10, restricted_chars=['\r', '\n']) row.append(field_value) input_table.append(row[:]) if is_good_row: expected_table.append(row[:]) query_col_name_1 = make_column_variable(header_row[query_col_1]) query_col_name_2 = make_column_variable(header_row[query_col_2]) query = 'select * where ({}.endswith("good") and int({}) * 2 == 20)'.format( query_col_name_1, query_col_name_2) delim = ',' policy = 'quoted' csv_data = table_to_csv_string_random(input_table, delim, policy) encoding = 'latin-1' stream = io.BytesIO(csv_data.encode(encoding)) input_stream, encoding = string_to_randomly_encoded_stream(csv_data) input_iterator = rbql_csv.CSVRecordIterator(input_stream, encoding, delim=delim, policy=policy, has_header=True) output_stream = io.BytesIO() if encoding is not None else io.StringIO() output_writer = rbql_csv.CSVWriter(output_stream, False, encoding, delim, policy) warnings = [] rbql.query(query, input_iterator, output_writer, warnings) input_stream.close() self.assertEqual(warnings, []) output_stream.seek(0) output_iterator = rbql_csv.CSVRecordIterator(output_stream, encoding, delim=delim, policy=policy) output_table = output_iterator.get_all_records() output_stream.close() self.assertEqual(expected_table, output_table)