def join(keys, tables): """Merge a list of `Table` objects using `keys` to group rows""" # Make new (merged) Table fields fields = OrderedDict() for table in tables: fields.update(table.fields) # TODO: may raise an error if a same field is different in some tables # Check if all keys are inside merged Table's fields fields_keys = set(fields.keys()) for key in keys: if key not in fields_keys: raise ValueError('Invalid key: "{}"'.format(key)) # Group rows by key, without missing ordering none_fields = lambda: OrderedDict({field: None for field in fields.keys()}) data = OrderedDict() for table in tables: for row in table: row_key = tuple([getattr(row, key) for key in keys]) if row_key not in data: data[row_key] = none_fields() data[row_key].update(row._asdict()) merged = Table(fields=fields) merged.extend(data.values()) return merged
def create_table(data, meta=None, force_headers=None, fields=None, skip_header=True, *args, **kwargs): # TODO: add auto_detect_types=True parameter table_rows = list(data) if fields is None: if force_headers is None: header = make_header(table_rows[0]) table_rows = table_rows[1:] else: header = force_headers fields = detect_types(header, table_rows, *args, **kwargs) else: if skip_header: table_rows = table_rows[1:] header = make_header(fields.keys()) assert type(fields) is collections.OrderedDict fields = {field_name: fields[key] for field_name, key in zip(header, fields)} else: header = make_header(fields.keys()) # TODO: may reuse max_columns from html max_columns = max(len(row) for row in table_rows) assert len(fields) == max_columns # TODO: put this inside Table.__init__ table = Table(fields=fields, meta=meta) for row in table_rows: table.append({field_name: value for field_name, value in zip(header, row)}) return table
def transform(fields, function, *tables): "Return a new table based on other tables and a transformation function" new_table = Table(fields=fields) for table in tables: for row in filter(bool, map(lambda row: function(row, table), table)): new_table.append(row) return new_table
def setUp(self): self.table = Table(fields={'name': rows.fields.UnicodeField, 'birthdate': rows.fields.DateField, }) self.first_row = {'name': u'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)} self.table.append(self.first_row) self.table.append({'name': u'Somebody', 'birthdate': datetime.date(1990, 2, 1)}) self.table.append({'name': u'Douglas Adams', 'birthdate': '1952-03-11'})
def create_table(data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs): # TODO: add auto_detect_types=True parameter table_rows = iter(data) sample_rows = [] if fields is None: header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) else: sample_rows = list(table_rows) fields = detect_types(header, sample_rows, *args, **kwargs) if force_types is not None: # TODO: optimize field detection (ignore fields on `force_types`) for field_name, field_type in force_types.items(): fields[field_name] = field_type else: if not isinstance(fields, OrderedDict): raise ValueError('`fields` must be an `OrderedDict`') if skip_header: _ = next(table_rows) header = make_header(list(fields.keys())) fields = OrderedDict([(field_name, fields[key]) for field_name, key in zip(header, fields)]) if import_fields is not None: # TODO: can optimize if import_fields is not None. # Example: do not detect all columns import_fields = make_header(import_fields) diff = set(import_fields) - set(header) if diff: field_names = ', '.join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) new_fields = OrderedDict() for field_name in import_fields: new_fields[field_name] = fields[field_name] fields = new_fields table = Table(fields=fields, meta=meta) # TODO: put this inside Table.__init__ for row in chain(sample_rows, table_rows): table.append({field_name: value for field_name, value in zip(header, row)}) return table
def import_from_uwsgi_log(filename): fields = UWSGI_FIELDS.keys() table = Table(fields=UWSGI_FIELDS) with open(filename) as fobj: for line in fobj: result = REGEXP_UWSGI_LOG.findall(line) if result: data = list(result[0]) # Convert datetime data[2] = strptime(data[2], UWSGI_DATETIME_FORMAT) # Convert generation time (micros -> seconds) data[5] = float(data[5]) / 1000000 table.append({field_name: value for field_name, value in zip(fields, data)}) return table
def setUp(self): self.table = Table(fields={ "name": rows.fields.TextField, "birthdate": rows.fields.DateField }) self.first_row = { "name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29), } self.table.append(self.first_row) self.table.append({ "name": "Somebody", "birthdate": datetime.date(1990, 2, 1) }) self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})
def import_from_uwsgi_log(filename): fields = UWSGI_FIELDS.keys() table = Table(fields=UWSGI_FIELDS) with open(filename) as fobj: for line in fobj: result = REGEXP_UWSGI_LOG.findall(line) if result: data = list(result[0]) # Convert datetime data[2] = strptime(data[2], UWSGI_DATETIME_FORMAT) # Convert generation time (micros -> seconds) data[5] = float(data[5]) / 1000000 table.append({ field_name: value for field_name, value in zip(fields, data) }) return table
def setUp(self): self.table = Table( fields={"name": rows.fields.TextField, "birthdate": rows.fields.DateField} ) self.first_row = { "name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29), } self.table.append(self.first_row) self.table.append({"name": "Somebody", "birthdate": datetime.date(1990, 2, 1)}) self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})
class TableTestCase(unittest.TestCase): def setUp(self): self.table = Table(fields={ 'name': rows.fields.TextField, 'birthdate': rows.fields.DateField, }) self.first_row = { 'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29) } self.table.append(self.first_row) self.table.append({ 'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1) }) self.table.append({'name': 'Douglas Adams', 'birthdate': '1952-03-11'}) def test_Table_is_present_on_main_namespace(self): self.assertIn('Table', dir(rows)) self.assertIs(Table, rows.Table) def test_table_iteration(self): # TODO: may test with all field types (using tests.utils.table) table_rows = [row for row in self.table] self.assertEqual(len(table_rows), 3) self.assertEqual(table_rows[0].name, 'Álvaro Justen') self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29)) self.assertEqual(table_rows[1].name, 'Somebody') self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1)) self.assertEqual(table_rows[2].name, 'Douglas Adams') self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11)) def test_table_slicing(self): self.assertEqual(len(self.table[::2]), 2) self.assertEqual(self.table[::2][0].name, 'Álvaro Justen') def test_table_slicing_error(self): with self.assertRaises(ValueError) as context_manager: self.table[[1]] self.assertEqual(type(context_manager.exception), ValueError) def test_table_insert_row(self): self.table.insert(1, { 'name': 'Grace Hopper', 'birthdate': datetime.date(1909, 12, 9) }) self.assertEqual(self.table[1].name, 'Grace Hopper') def test_table_append_error(self): # TODO: may mock these validations and test only on *Field tests with self.assertRaises(ValueError) as context_manager: self.table.append({ 'name': 'Álvaro Justen'.encode('utf-8'), 'birthdate': '1987-04-29' }) self.assertEqual(type(context_manager.exception), ValueError) self.assertEqual(context_manager.exception.args[0], 'Binary is not supported') with self.assertRaises(ValueError) as context_manager: self.table.append({'name': 'Álvaro Justen', 'birthdate': 'WRONG'}) self.assertEqual(type(context_manager.exception), ValueError) self.assertIn('does not match format', context_manager.exception.args[0]) def test_table_getitem_invalid_type(self): with self.assertRaises(ValueError) as exception_context: self.table[3.14] self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'name'] self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: {}'.format(binary_type_name)) def test_table_getitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: self.table['doesnt-exist'] self.assertEqual(exception_context.exception.args[0], 'doesnt-exist') def test_table_getitem_column_happy_path(self): expected_values = ['Álvaro Justen', 'Somebody', 'Douglas Adams'] self.assertEqual(self.table['name'], expected_values) expected_values = [ datetime.date(1987, 4, 29), datetime.date(1990, 2, 1), datetime.date(1952, 3, 11) ] self.assertEqual(self.table['birthdate'], expected_values) def test_table_setitem_row(self): self.first_row['name'] = 'turicas' self.first_row['birthdate'] = datetime.date(2000, 1, 1) self.table[0] = self.first_row self.assertEqual(self.table[0].name, 'turicas') self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1)) def test_field_names_and_types(self): self.assertEqual(self.table.field_names, list(self.table.fields.keys())) self.assertEqual(self.table.field_types, list(self.table.fields.values())) def test_table_setitem_column_happy_path_new_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table['user_id'] = [4, 5, 6] self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields + 1) self.assertIn('user_id', self.table.fields) self.assertIs(self.table.fields['user_id'], rows.fields.IntegerField) self.assertEqual(self.table[0].user_id, 4) self.assertEqual(self.table[1].user_id, 5) self.assertEqual(self.table[2].user_id, 6) def test_table_setitem_column_happy_path_replace_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table['name'] = [4, 5, 6] # change values *and* type self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertIn('name', self.table.fields) self.assertIs(self.table.fields['name'], rows.fields.IntegerField) self.assertEqual(self.table[0].name, 4) self.assertEqual(self.table[1].name, 5) self.assertEqual(self.table[2].name, 6) def test_table_setitem_column_slug_field_name(self): self.assertNotIn('user_id', self.table.fields) self.table['User ID'] = [4, 5, 6] self.assertIn('user_id', self.table.fields) def test_table_setitem_column_invalid_length(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table['user_id'] = [4, 5] # list len should be 3 self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertEqual( exception_context.exception.args[0], 'Values length (2) should be the same as Table ' 'length (3)') def test_table_setitem_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table[3.14] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'some_value'] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: {}'.format(binary_type_name)) def test_table_delitem_row(self): table_rows = [row for row in self.table] before = len(self.table) del self.table[0] after = len(self.table) self.assertEqual(after, before - 1) for row, expected_row in zip(self.table, table_rows[1:]): self.assertEqual(row, expected_row) def test_table_delitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: del self.table['doesnt-exist'] self.assertEqual(exception_context.exception.args[0], 'doesnt-exist') def test_table_delitem_column_happy_path(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) del self.table['name'] self.assertEqual(len(self.table), 3) # should not del any row self.assertEqual(len(self.table.fields), len(fields) - 1) self.assertDictEqual(dict(self.table[0]._asdict()), {'birthdate': datetime.date(1987, 4, 29)}) self.assertDictEqual(dict(self.table[1]._asdict()), {'birthdate': datetime.date(1990, 2, 1)}) self.assertDictEqual(dict(self.table[2]._asdict()), {'birthdate': datetime.date(1952, 3, 11)}) def test_table_delitem_column_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: del self.table[3.14] self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'name'] = [] # 'name' actually exists self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: {}'.format(binary_type_name)) def test_table_add(self): self.assertIs(self.table + 0, self.table) self.assertIs(0 + self.table, self.table) new_table = self.table + self.table self.assertEqual(new_table.fields, self.table.fields) self.assertEqual(len(new_table), 2 * len(self.table)) self.assertEqual(list(new_table), list(self.table) * 2) def test_table_add_error(self): with self.assertRaises(ValueError): self.table + 1 with self.assertRaises(ValueError): 1 + self.table def test_table_order_by(self): with self.assertRaises(ValueError): self.table.order_by('doesnt_exist') before = [row.birthdate for row in self.table] self.table.order_by('birthdate') after = [row.birthdate for row in self.table] self.assertNotEqual(before, after) self.assertEqual(sorted(before), after) self.table.order_by('-birthdate') final = [row.birthdate for row in self.table] self.assertEqual(final, list(reversed(after))) self.table.order_by('name') expected_rows = [{ 'name': 'Douglas Adams', 'birthdate': datetime.date(1952, 3, 11) }, { 'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1) }, { 'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29) }] for expected_row, row in zip(expected_rows, self.table): self.assertEqual(expected_row, dict(row._asdict())) def test_table_repr(self): expected = '<rows.Table 2 fields, 3 rows>' self.assertEqual(expected, repr(self.table)) def test_table_add_time(self): '''rows.Table.__add__ should be constant time To test it we double table size for each round and then compare the standard deviation to the mean (it will be almost the mean if the algorithm is not fast enough and almost 10% of the mean if it's good). ''' rounds = [] table = utils.table for _ in range(5): start = time.time() table = table + table end = time.time() rounds.append(end - start) mean = sum(rounds) / len(rounds) stdev = math.sqrt((1.0 / (len(rounds) - 1)) * sum( (value - mean)**2 for value in rounds)) self.assertTrue(0.2 * mean > stdev)
class TableTestCase(unittest.TestCase): def setUp(self): self.table = Table(fields={ "name": rows.fields.TextField, "birthdate": rows.fields.DateField }) self.first_row = { "name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29), } self.table.append(self.first_row) self.table.append({ "name": "Somebody", "birthdate": datetime.date(1990, 2, 1) }) self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"}) def test_table_init_slug_creation_on_fields(self): table = rows.Table(fields=collections.OrderedDict([( 'Query Occurrence"( % ),"First Seen', rows.fields.FloatField)])) self.assertIn("query_occurrence_first_seen", table.fields) def test_Table_is_present_on_main_namespace(self): self.assertIn("Table", dir(rows)) self.assertIs(Table, rows.Table) def test_table_iteration(self): # TODO: may test with all field types (using tests.utils.table) table_rows = [row for row in self.table] self.assertEqual(len(table_rows), 3) self.assertEqual(table_rows[0].name, "Álvaro Justen") self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29)) self.assertEqual(table_rows[1].name, "Somebody") self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1)) self.assertEqual(table_rows[2].name, "Douglas Adams") self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11)) def test_table_slicing(self): self.assertEqual(len(self.table[::2]), 2) self.assertEqual(self.table[::2][0].name, "Álvaro Justen") def test_table_slicing_error(self): with self.assertRaises(ValueError) as context_manager: self.table[[1]] self.assertEqual(type(context_manager.exception), ValueError) def test_table_insert_row(self): self.table.insert(1, { "name": "Grace Hopper", "birthdate": datetime.date(1909, 12, 9) }) self.assertEqual(self.table[1].name, "Grace Hopper") def test_table_append_error(self): # TODO: may mock these validations and test only on *Field tests with self.assertRaises(ValueError) as context_manager: self.table.append({ "name": "Álvaro Justen".encode("utf-8"), "birthdate": "1987-04-29" }) self.assertEqual(type(context_manager.exception), ValueError) self.assertEqual(context_manager.exception.args[0], "Binary is not supported") with self.assertRaises(ValueError) as context_manager: self.table.append({"name": "Álvaro Justen", "birthdate": "WRONG"}) self.assertEqual(type(context_manager.exception), ValueError) self.assertIn("does not match format", context_manager.exception.args[0]) def test_table_getitem_invalid_type(self): with self.assertRaises(ValueError) as exception_context: self.table[3.14] self.assertEqual(exception_context.exception.args[0], "Unsupported key type: float") with self.assertRaises(ValueError) as exception_context: self.table[b"name"] self.assertEqual( exception_context.exception.args[0], "Unsupported key type: {}".format(binary_type_name), ) def test_table_getitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: self.table["doesnt-exist"] self.assertEqual(exception_context.exception.args[0], "doesnt-exist") def test_table_getitem_column_happy_path(self): expected_values = ["Álvaro Justen", "Somebody", "Douglas Adams"] self.assertEqual(self.table["name"], expected_values) expected_values = [ datetime.date(1987, 4, 29), datetime.date(1990, 2, 1), datetime.date(1952, 3, 11), ] self.assertEqual(self.table["birthdate"], expected_values) def test_table_setitem_row(self): self.first_row["name"] = "turicas" self.first_row["birthdate"] = datetime.date(2000, 1, 1) self.table[0] = self.first_row self.assertEqual(self.table[0].name, "turicas") self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1)) def test_field_names_and_types(self): self.assertEqual(self.table.field_names, list(self.table.fields.keys())) self.assertEqual(self.table.field_types, list(self.table.fields.values())) def test_table_setitem_column_happy_path_new_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table["user_id"] = [4, 5, 6] self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields + 1) self.assertIn("user_id", self.table.fields) self.assertIs(self.table.fields["user_id"], rows.fields.IntegerField) self.assertEqual(self.table[0].user_id, 4) self.assertEqual(self.table[1].user_id, 5) self.assertEqual(self.table[2].user_id, 6) def test_table_setitem_column_happy_path_replace_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table["name"] = [4, 5, 6] # change values *and* type self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertIn("name", self.table.fields) self.assertIs(self.table.fields["name"], rows.fields.IntegerField) self.assertEqual(self.table[0].name, 4) self.assertEqual(self.table[1].name, 5) self.assertEqual(self.table[2].name, 6) def test_table_setitem_column_slug_field_name(self): self.assertNotIn("user_id", self.table.fields) self.table["User ID"] = [4, 5, 6] self.assertIn("user_id", self.table.fields) def test_table_setitem_column_invalid_length(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table["user_id"] = [4, 5] # list len should be 3 self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertEqual( exception_context.exception.args[0], "Values length (2) should be the same as Table " "length (3)", ) def test_table_setitem_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table[3.14] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.args[0], "Unsupported key type: float") with self.assertRaises(ValueError) as exception_context: self.table[b"some_value"] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual( exception_context.exception.args[0], "Unsupported key type: {}".format(binary_type_name), ) def test_table_delitem_row(self): table_rows = [row for row in self.table] before = len(self.table) del self.table[0] after = len(self.table) self.assertEqual(after, before - 1) for row, expected_row in zip(self.table, table_rows[1:]): self.assertEqual(row, expected_row) def test_table_delitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: del self.table["doesnt-exist"] self.assertEqual(exception_context.exception.args[0], "doesnt-exist") def test_table_delitem_column_happy_path(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) del self.table["name"] self.assertEqual(len(self.table), 3) # should not del any row self.assertEqual(len(self.table.fields), len(fields) - 1) self.assertDictEqual(dict(self.table[0]._asdict()), {"birthdate": datetime.date(1987, 4, 29)}) self.assertDictEqual(dict(self.table[1]._asdict()), {"birthdate": datetime.date(1990, 2, 1)}) self.assertDictEqual(dict(self.table[2]._asdict()), {"birthdate": datetime.date(1952, 3, 11)}) def test_table_delitem_column_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: del self.table[3.14] self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.args[0], "Unsupported key type: float") with self.assertRaises(ValueError) as exception_context: self.table[b"name"] = [] # 'name' actually exists self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual( exception_context.exception.args[0], "Unsupported key type: {}".format(binary_type_name), ) def test_table_add(self): self.assertIs(self.table + 0, self.table) self.assertIs(0 + self.table, self.table) new_table = self.table + self.table self.assertEqual(new_table.fields, self.table.fields) self.assertEqual(len(new_table), 2 * len(self.table)) self.assertEqual(list(new_table), list(self.table) * 2) def test_table_add_error(self): with self.assertRaises(ValueError): self.table + 1 with self.assertRaises(ValueError): 1 + self.table def test_table_order_by(self): with self.assertRaises(ValueError): self.table.order_by("doesnt_exist") before = [row.birthdate for row in self.table] self.table.order_by("birthdate") after = [row.birthdate for row in self.table] self.assertNotEqual(before, after) self.assertEqual(sorted(before), after) self.table.order_by("-birthdate") final = [row.birthdate for row in self.table] self.assertEqual(final, list(reversed(after))) self.table.order_by("name") expected_rows = [ { "name": "Douglas Adams", "birthdate": datetime.date(1952, 3, 11) }, { "name": "Somebody", "birthdate": datetime.date(1990, 2, 1) }, { "name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29) }, ] for expected_row, row in zip(expected_rows, self.table): self.assertEqual(expected_row, dict(row._asdict())) def test_table_repr(self): expected = "<rows.Table 2 fields, 3 rows>" self.assertEqual(expected, repr(self.table)) def test_table_add_should_not_iterate_over_rows(self): table1 = rows.Table(fields={ "f1": rows.fields.IntegerField, "f2": rows.fields.FloatField }) table2 = rows.Table(fields={ "f1": rows.fields.IntegerField, "f2": rows.fields.FloatField }) table1._rows = mock.Mock() table1._rows.__add__ = mock.Mock() table1._rows.__iter__ = mock.Mock() table2._rows = mock.Mock() table2._rows.__add__ = mock.Mock() table2._rows.__iter__ = mock.Mock() self.assertFalse(table1._rows.__add__.called) self.assertFalse(table2._rows.__add__.called) self.assertFalse(table1._rows.__iter__.called) self.assertFalse(table2._rows.__iter__.called) table1 + table2 self.assertTrue(table1._rows.__add__.called) self.assertFalse(table2._rows.__add__.called) self.assertFalse(table1._rows.__iter__.called) self.assertFalse(table2._rows.__iter__.called)
def create_table(data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs): # TODO: add auto_detect_types=True parameter table_rows = iter(data) sample_rows = [] if fields is None: header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) else: sample_rows = list(table_rows) fields = detect_types(header, sample_rows, *args, **kwargs) if force_types is not None: # TODO: optimize field detection (ignore fields on `force_types`) for field_name, field_type in force_types.items(): fields[field_name] = field_type else: if not isinstance(fields, OrderedDict): raise ValueError('`fields` must be an `OrderedDict`') if skip_header: next(table_rows) header = make_header(list(fields.keys())) fields = OrderedDict([(field_name, fields[key]) for field_name, key in zip(header, fields)]) if import_fields is not None: # TODO: can optimize if import_fields is not None. # Example: do not detect all columns import_fields = make_header(import_fields) diff = set(import_fields) - set(header) if diff: field_names = ', '.join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) new_fields = OrderedDict() for field_name in import_fields: new_fields[field_name] = fields[field_name] fields = new_fields table = Table(fields=fields, meta=meta) # TODO: put this inside Table.__init__ for row in chain(sample_rows, table_rows): table.append( {field_name: value for field_name, value in zip(header, row)}) return table
class TableTestCase(unittest.TestCase): def setUp(self): self.table = Table(fields={'name': rows.fields.TextField, 'birthdate': rows.fields.DateField, }) self.first_row = {'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)} self.table.append(self.first_row) self.table.append({'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1)}) self.table.append({'name': 'Douglas Adams', 'birthdate': '1952-03-11'}) def test_Table_is_present_on_main_namespace(self): self.assertIn('Table', dir(rows)) self.assertIs(Table, rows.Table) def test_table_iteration(self): # TODO: may test with all field types (using tests.utils.table) table_rows = [row for row in self.table] self.assertEqual(len(table_rows), 3) self.assertEqual(table_rows[0].name, 'Álvaro Justen') self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29)) self.assertEqual(table_rows[1].name, 'Somebody') self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1)) self.assertEqual(table_rows[2].name, 'Douglas Adams') self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11)) def test_table_slicing(self): self.assertEqual(len(self.table[::2]), 2) self.assertEqual(self.table[::2][0].name, 'Álvaro Justen') def test_table_slicing_error(self): with self.assertRaises(ValueError) as context_manager: self.table[[1]] self.assertEqual(type(context_manager.exception), ValueError) def test_table_insert_row(self): self.table.insert(1, {'name': 'Grace Hopper', 'birthdate': datetime.date(1909, 12, 9)}) self.assertEqual(self.table[1].name, 'Grace Hopper') def test_table_append_error(self): # TODO: may mock these validations and test only on *Field tests with self.assertRaises(ValueError) as context_manager: self.table.append({'name': 'Álvaro Justen'.encode('utf-8'), 'birthdate': '1987-04-29'}) self.assertEqual(type(context_manager.exception), ValueError) self.assertEqual(context_manager.exception.args[0], 'Binary is not supported') with self.assertRaises(ValueError) as context_manager: self.table.append({'name': 'Álvaro Justen', 'birthdate': 'WRONG'}) self.assertEqual(type(context_manager.exception), ValueError) self.assertIn('does not match format', context_manager.exception.args[0]) def test_table_getitem_invalid_type(self): with self.assertRaises(ValueError) as exception_context: self.table[3.14] self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'name'] self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: {}'.format(binary_type_name)) def test_table_getitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: self.table['doesnt-exist'] self.assertEqual(exception_context.exception.args[0], 'doesnt-exist') def test_table_getitem_column_happy_path(self): expected_values = ['Álvaro Justen', 'Somebody', 'Douglas Adams'] self.assertEqual(self.table['name'], expected_values) expected_values = [ datetime.date(1987, 4, 29), datetime.date(1990, 2, 1), datetime.date(1952, 3, 11)] self.assertEqual(self.table['birthdate'], expected_values) def test_table_setitem_row(self): self.first_row['name'] = 'turicas' self.first_row['birthdate'] = datetime.date(2000, 1, 1) self.table[0] = self.first_row self.assertEqual(self.table[0].name, 'turicas') self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1)) def test_field_names_and_types(self): self.assertEqual(self.table.field_names, list(self.table.fields.keys())) self.assertEqual(self.table.field_types, list(self.table.fields.values())) def test_table_setitem_column_happy_path_new_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table['user_id'] = [4, 5, 6] self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields + 1) self.assertIn('user_id', self.table.fields) self.assertIs(self.table.fields['user_id'], rows.fields.IntegerField) self.assertEqual(self.table[0].user_id, 4) self.assertEqual(self.table[1].user_id, 5) self.assertEqual(self.table[2].user_id, 6) def test_table_setitem_column_happy_path_replace_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table['name'] = [4, 5, 6] # change values *and* type self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertIn('name', self.table.fields) self.assertIs(self.table.fields['name'], rows.fields.IntegerField) self.assertEqual(self.table[0].name, 4) self.assertEqual(self.table[1].name, 5) self.assertEqual(self.table[2].name, 6) def test_table_setitem_column_slug_field_name(self): self.assertNotIn('user_id', self.table.fields) self.table['User ID'] = [4, 5, 6] self.assertIn('user_id', self.table.fields) def test_table_setitem_column_invalid_length(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table['user_id'] = [4, 5] # list len should be 3 self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertEqual(exception_context.exception.args[0], 'Values length (2) should be the same as Table ' 'length (3)') def test_table_setitem_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table[3.14] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'some_value'] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: {}'.format(binary_type_name)) def test_table_delitem_row(self): table_rows = [row for row in self.table] before = len(self.table) del self.table[0] after = len(self.table) self.assertEqual(after, before - 1) for row, expected_row in zip(self.table, table_rows[1:]): self.assertEqual(row, expected_row) def test_table_delitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: del self.table['doesnt-exist'] self.assertEqual(exception_context.exception.args[0], 'doesnt-exist') def test_table_delitem_column_happy_path(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) del self.table['name'] self.assertEqual(len(self.table), 3) # should not del any row self.assertEqual(len(self.table.fields), len(fields) - 1) self.assertDictEqual(dict(self.table[0]._asdict()), {'birthdate': datetime.date(1987, 4, 29)}) self.assertDictEqual(dict(self.table[1]._asdict()), {'birthdate': datetime.date(1990, 2, 1)}) self.assertDictEqual(dict(self.table[2]._asdict()), {'birthdate': datetime.date(1952, 3, 11)}) def test_table_delitem_column_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: del self.table[3.14] self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'name'] = [] # 'name' actually exists self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.args[0], 'Unsupported key type: {}'.format(binary_type_name)) def test_table_add(self): self.assertIs(self.table + 0, self.table) self.assertIs(0 + self.table, self.table) new_table = self.table + self.table self.assertEqual(new_table.fields, self.table.fields) self.assertEqual(len(new_table), 2 * len(self.table)) self.assertEqual(list(new_table), list(self.table) * 2) def test_table_add_error(self): with self.assertRaises(ValueError): self.table + 1 with self.assertRaises(ValueError): 1 + self.table def test_table_order_by(self): with self.assertRaises(ValueError): self.table.order_by('doesnt_exist') before = [row.birthdate for row in self.table] self.table.order_by('birthdate') after = [row.birthdate for row in self.table] self.assertNotEqual(before, after) self.assertEqual(sorted(before), after) self.table.order_by('-birthdate') final = [row.birthdate for row in self.table] self.assertEqual(final, list(reversed(after))) self.table.order_by('name') expected_rows = [ {'name': 'Douglas Adams', 'birthdate': datetime.date(1952, 3, 11)}, {'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1)}, {'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)}] for expected_row, row in zip(expected_rows, self.table): self.assertEqual(expected_row, dict(row._asdict())) def test_table_repr(self): expected = '<rows.Table 2 fields, 3 rows>' self.assertEqual(expected, repr(self.table)) def test_table_add_time(self): '''rows.Table.__add__ should be constant time To test it we double table size for each round and then compare the standard deviation to the mean (it will be almost the mean if the algorithm is not fast enough and almost 10% of the mean if it's good). ''' rounds = [] table = utils.table for _ in range(10): start = time.time() table = table + table end = time.time() rounds.append(end - start) mean = sum(rounds) / len(rounds) stdev = math.sqrt((1.0 / (len(rounds) - 1)) * sum((value - mean) ** 2 for value in rounds)) self.assertTrue(0.2 * mean > stdev)
class TableTestCase(unittest.TestCase): def setUp(self): self.table = Table( fields={"name": rows.fields.TextField, "birthdate": rows.fields.DateField} ) self.first_row = { "name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29), } self.table.append(self.first_row) self.table.append({"name": "Somebody", "birthdate": datetime.date(1990, 2, 1)}) self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"}) def test_table_init_slug_creation_on_fields(self): table = rows.Table( fields=collections.OrderedDict( [('Query Occurrence"( % ),"First Seen', rows.fields.FloatField)] ) ) self.assertIn("query_occurrence_first_seen", table.fields) def test_Table_is_present_on_main_namespace(self): self.assertIn("Table", dir(rows)) self.assertIs(Table, rows.Table) def test_table_iteration(self): # TODO: may test with all field types (using tests.utils.table) table_rows = [row for row in self.table] self.assertEqual(len(table_rows), 3) self.assertEqual(table_rows[0].name, "Álvaro Justen") self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29)) self.assertEqual(table_rows[1].name, "Somebody") self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1)) self.assertEqual(table_rows[2].name, "Douglas Adams") self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11)) def test_table_slicing(self): self.assertEqual(len(self.table[::2]), 2) self.assertEqual(self.table[::2][0].name, "Álvaro Justen") def test_table_slicing_error(self): with self.assertRaises(ValueError) as context_manager: self.table[[1]] self.assertEqual(type(context_manager.exception), ValueError) def test_table_insert_row(self): self.table.insert( 1, {"name": "Grace Hopper", "birthdate": datetime.date(1909, 12, 9)} ) self.assertEqual(self.table[1].name, "Grace Hopper") def test_table_append_error(self): # TODO: may mock these validations and test only on *Field tests with self.assertRaises(ValueError) as context_manager: self.table.append( {"name": "Álvaro Justen".encode("utf-8"), "birthdate": "1987-04-29"} ) self.assertEqual(type(context_manager.exception), ValueError) self.assertEqual(context_manager.exception.args[0], "Binary is not supported") with self.assertRaises(ValueError) as context_manager: self.table.append({"name": "Álvaro Justen", "birthdate": "WRONG"}) self.assertEqual(type(context_manager.exception), ValueError) self.assertIn("does not match format", context_manager.exception.args[0]) def test_table_getitem_invalid_type(self): with self.assertRaises(ValueError) as exception_context: self.table[3.14] self.assertEqual( exception_context.exception.args[0], "Unsupported key type: float" ) with self.assertRaises(ValueError) as exception_context: self.table[b"name"] self.assertEqual( exception_context.exception.args[0], "Unsupported key type: {}".format(binary_type_name), ) def test_table_getitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: self.table["doesnt-exist"] self.assertEqual(exception_context.exception.args[0], "doesnt-exist") def test_table_getitem_column_happy_path(self): expected_values = ["Álvaro Justen", "Somebody", "Douglas Adams"] self.assertEqual(self.table["name"], expected_values) expected_values = [ datetime.date(1987, 4, 29), datetime.date(1990, 2, 1), datetime.date(1952, 3, 11), ] self.assertEqual(self.table["birthdate"], expected_values) def test_table_setitem_row(self): self.first_row["name"] = "turicas" self.first_row["birthdate"] = datetime.date(2000, 1, 1) self.table[0] = self.first_row self.assertEqual(self.table[0].name, "turicas") self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1)) def test_field_names_and_types(self): self.assertEqual(self.table.field_names, list(self.table.fields.keys())) self.assertEqual(self.table.field_types, list(self.table.fields.values())) def test_table_setitem_column_happy_path_new_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table["user_id"] = [4, 5, 6] self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields + 1) self.assertIn("user_id", self.table.fields) self.assertIs(self.table.fields["user_id"], rows.fields.IntegerField) self.assertEqual(self.table[0].user_id, 4) self.assertEqual(self.table[1].user_id, 5) self.assertEqual(self.table[2].user_id, 6) def test_table_setitem_column_happy_path_replace_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table["name"] = [4, 5, 6] # change values *and* type self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertIn("name", self.table.fields) self.assertIs(self.table.fields["name"], rows.fields.IntegerField) self.assertEqual(self.table[0].name, 4) self.assertEqual(self.table[1].name, 5) self.assertEqual(self.table[2].name, 6) def test_table_setitem_column_slug_field_name(self): self.assertNotIn("user_id", self.table.fields) self.table["User ID"] = [4, 5, 6] self.assertIn("user_id", self.table.fields) def test_table_setitem_column_invalid_length(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table["user_id"] = [4, 5] # list len should be 3 self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertEqual( exception_context.exception.args[0], "Values length (2) should be the same as Table " "length (3)", ) def test_table_setitem_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table[3.14] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual( exception_context.exception.args[0], "Unsupported key type: float" ) with self.assertRaises(ValueError) as exception_context: self.table[b"some_value"] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual( exception_context.exception.args[0], "Unsupported key type: {}".format(binary_type_name), ) def test_table_delitem_row(self): table_rows = [row for row in self.table] before = len(self.table) del self.table[0] after = len(self.table) self.assertEqual(after, before - 1) for row, expected_row in zip(self.table, table_rows[1:]): self.assertEqual(row, expected_row) def test_table_delitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: del self.table["doesnt-exist"] self.assertEqual(exception_context.exception.args[0], "doesnt-exist") def test_table_delitem_column_happy_path(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) del self.table["name"] self.assertEqual(len(self.table), 3) # should not del any row self.assertEqual(len(self.table.fields), len(fields) - 1) self.assertDictEqual( dict(self.table[0]._asdict()), {"birthdate": datetime.date(1987, 4, 29)} ) self.assertDictEqual( dict(self.table[1]._asdict()), {"birthdate": datetime.date(1990, 2, 1)} ) self.assertDictEqual( dict(self.table[2]._asdict()), {"birthdate": datetime.date(1952, 3, 11)} ) def test_table_delitem_column_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: del self.table[3.14] self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual( exception_context.exception.args[0], "Unsupported key type: float" ) with self.assertRaises(ValueError) as exception_context: self.table[b"name"] = [] # 'name' actually exists self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual( exception_context.exception.args[0], "Unsupported key type: {}".format(binary_type_name), ) def test_table_add(self): self.assertIs(self.table + 0, self.table) self.assertIs(0 + self.table, self.table) new_table = self.table + self.table self.assertEqual(new_table.fields, self.table.fields) self.assertEqual(len(new_table), 2 * len(self.table)) self.assertEqual(list(new_table), list(self.table) * 2) def test_table_add_error(self): with self.assertRaises(ValueError): self.table + 1 with self.assertRaises(ValueError): 1 + self.table def test_table_order_by(self): with self.assertRaises(ValueError): self.table.order_by("doesnt_exist") before = [row.birthdate for row in self.table] self.table.order_by("birthdate") after = [row.birthdate for row in self.table] self.assertNotEqual(before, after) self.assertEqual(sorted(before), after) self.table.order_by("-birthdate") final = [row.birthdate for row in self.table] self.assertEqual(final, list(reversed(after))) self.table.order_by("name") expected_rows = [ {"name": "Douglas Adams", "birthdate": datetime.date(1952, 3, 11)}, {"name": "Somebody", "birthdate": datetime.date(1990, 2, 1)}, {"name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29)}, ] for expected_row, row in zip(expected_rows, self.table): self.assertEqual(expected_row, dict(row._asdict())) def test_table_repr(self): expected = "<rows.Table 2 fields, 3 rows>" self.assertEqual(expected, repr(self.table)) def test_table_add_should_not_iterate_over_rows(self): table1 = rows.Table( fields={"f1": rows.fields.IntegerField, "f2": rows.fields.FloatField} ) table2 = rows.Table( fields={"f1": rows.fields.IntegerField, "f2": rows.fields.FloatField} ) table1._rows = mock.Mock() table1._rows.__add__ = mock.Mock() table1._rows.__iter__ = mock.Mock() table2._rows = mock.Mock() table2._rows.__add__ = mock.Mock() table2._rows.__iter__ = mock.Mock() self.assertFalse(table1._rows.__add__.called) self.assertFalse(table2._rows.__add__.called) self.assertFalse(table1._rows.__iter__.called) self.assertFalse(table2._rows.__iter__.called) table1 + table2 self.assertTrue(table1._rows.__add__.called) self.assertFalse(table2._rows.__add__.called) self.assertFalse(table1._rows.__iter__.called) self.assertFalse(table2._rows.__iter__.called)
"datetime_column": datetime.datetime(2015, 5, 6, 12, 1, 2), "percent_column": Decimal("0.02"), "unicode_column": "test", }, { "float_column": None, "decimal_column": None, "bool_column": None, "integer_column": None, "date_column": None, "datetime_column": None, "percent_column": None, "unicode_column": "", }, ] table = Table(fields=FIELDS) for row in EXPECTED_ROWS: table.append(row) table._meta = {"test": 123} class LazyGenerator(object): def __init__(self, max_number): self.max_number = max_number self.last = None def __iter__(self): yield ["number", "number_sq", "number_double"] for number in range(self.max_number): self.last = number yield [self.last, self.last ** 2, self.last * 2]
class TableTestCase(unittest.TestCase): def setUp(self): self.table = Table(fields={ 'name': rows.fields.TextField, 'birthdate': rows.fields.DateField, }) self.first_row = { 'name': u'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29) } self.table.append(self.first_row) self.table.append({ 'name': u'Somebody', 'birthdate': datetime.date(1990, 2, 1) }) self.table.append({ 'name': u'Douglas Adams', 'birthdate': '1952-03-11' }) def test_Table_is_present_on_main_namespace(self): self.assertIn('Table', dir(rows)) self.assertIs(Table, rows.Table) def test_table_iteration(self): # TODO: may test with all field types (using tests.utils.table) table_rows = [row for row in self.table] self.assertEqual(len(table_rows), 3) self.assertEqual(table_rows[0].name, u'Álvaro Justen') self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29)) self.assertEqual(table_rows[1].name, u'Somebody') self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1)) self.assertEqual(table_rows[2].name, u'Douglas Adams') self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11)) def test_table_slicing(self): self.assertEqual(len(self.table[::2]), 2) self.assertEqual(self.table[::2][0].name, u'Álvaro Justen') def test_table_slicing_error(self): with self.assertRaises(ValueError) as context_manager: self.table[[1]] self.assertEqual(type(context_manager.exception), ValueError) def test_table_insert_row(self): self.table.insert(1, { 'name': u'Grace Hopper', 'birthdate': datetime.date(1909, 12, 9) }) self.assertEqual(self.table[1].name, u'Grace Hopper') def test_table_append_error(self): # TODO: may mock these validations and test only on *Field tests with self.assertRaises(ValueError) as context_manager: self.table.append({ 'name': 'Álvaro Justen'.encode('utf-8'), 'birthdate': '1987-04-29' }) self.assertEqual(type(context_manager.exception), UnicodeDecodeError) with self.assertRaises(ValueError) as context_manager: self.table.append({'name': u'Álvaro Justen', 'birthdate': 'WRONG'}) self.assertEqual(type(context_manager.exception), ValueError) self.assertIn('does not match format', context_manager.exception.message) def test_table_getitem_invalid_type(self): with self.assertRaises(ValueError) as exception_context: self.table[3.14] self.assertEqual(exception_context.exception.message, 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'name'] self.assertEqual(exception_context.exception.message, 'Unsupported key type: str') # TODO: should change to 'bytes' on Python3 def test_table_getitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: self.table['doesnt-exist'] self.assertEqual(exception_context.exception.message, 'doesnt-exist') def test_table_getitem_column_happy_path(self): expected_values = ['Álvaro Justen', 'Somebody', 'Douglas Adams'] self.assertEqual(self.table['name'], expected_values) expected_values = [ datetime.date(1987, 4, 29), datetime.date(1990, 2, 1), datetime.date(1952, 3, 11) ] self.assertEqual(self.table['birthdate'], expected_values) def test_table_setitem_row(self): self.first_row['name'] = 'turicas' self.first_row['birthdate'] = datetime.date(2000, 1, 1) self.table[0] = self.first_row self.assertEqual(self.table[0].name, 'turicas') self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1)) def test_field_names_and_types(self): self.assertEqual(self.table.field_names, self.table.fields.keys()) self.assertEqual(self.table.field_types, self.table.fields.values()) def test_table_setitem_column_happy_path_new_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table['user_id'] = [4, 5, 6] self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields + 1) self.assertIn('user_id', self.table.fields) self.assertIs(self.table.fields['user_id'], rows.fields.IntegerField) self.assertEqual(self.table[0].user_id, 4) self.assertEqual(self.table[1].user_id, 5) self.assertEqual(self.table[2].user_id, 6) def test_table_setitem_column_happy_path_replace_column(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) self.table['name'] = [4, 5, 6] # change values *and* type self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertIn('name', self.table.fields) self.assertIs(self.table.fields['name'], rows.fields.IntegerField) self.assertEqual(self.table[0].name, 4) self.assertEqual(self.table[1].name, 5) self.assertEqual(self.table[2].name, 6) def test_table_setitem_column_slug_field_name(self): self.assertNotIn('user_id', self.table.fields) self.table['User ID'] = [4, 5, 6] self.assertIn('user_id', self.table.fields) def test_table_setitem_column_invalid_length(self): number_of_fields = len(self.table.fields) self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table['user_id'] = [4, 5] # list len should be 3 self.assertEqual(len(self.table), 3) self.assertEqual(len(self.table.fields), number_of_fields) self.assertEqual( exception_context.exception.message, 'Values length (2) should be the same as Table ' 'length (3)') def test_table_setitem_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: self.table[3.14] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.message, 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'some_value'] = [] self.assertEqual(len(self.table), 3) # should not add any row self.assertDictEqual(fields, self.table.fields) # should not add field self.assertEqual(exception_context.exception.message, 'Unsupported key type: str') # TODO: should change to 'bytes' on Python3 def test_table_delitem_row(self): table_rows = [row for row in self.table] before = len(self.table) del self.table[0] after = len(self.table) self.assertEqual(after, before - 1) for row, expected_row in zip(self.table, table_rows[1:]): self.assertEqual(row, expected_row) def test_table_delitem_column_doesnt_exist(self): with self.assertRaises(KeyError) as exception_context: del self.table['doesnt-exist'] self.assertEqual(exception_context.exception.message, 'doesnt-exist') def test_table_delitem_column_happy_path(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) del self.table['name'] self.assertEqual(len(self.table), 3) # should not del any row self.assertEqual(len(self.table.fields), len(fields) - 1) self.assertDictEqual(dict(self.table[0]._asdict()), {'birthdate': datetime.date(1987, 4, 29)}) self.assertDictEqual(dict(self.table[1]._asdict()), {'birthdate': datetime.date(1990, 2, 1)}) self.assertDictEqual(dict(self.table[2]._asdict()), {'birthdate': datetime.date(1952, 3, 11)}) def test_table_delitem_column_invalid_type(self): fields = self.table.fields.copy() self.assertEqual(len(self.table), 3) with self.assertRaises(ValueError) as exception_context: del self.table[3.14] self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.message, 'Unsupported key type: float') with self.assertRaises(ValueError) as exception_context: self.table[b'name'] = [] # u'name' actually exists self.assertEqual(len(self.table), 3) # should not del any row self.assertDictEqual(fields, self.table.fields) # should not del field self.assertEqual(exception_context.exception.message, 'Unsupported key type: str') # TODO: should change to 'bytes' on Python3 def test_table_add(self): self.assertIs(self.table + 0, self.table) self.assertIs(0 + self.table, self.table) new_table = self.table + self.table self.assertEqual(new_table.fields, self.table.fields) self.assertEqual(len(new_table), 2 * len(self.table)) self.assertEqual(list(new_table), list(self.table) * 2) def test_table_add_error(self): with self.assertRaises(ValueError): self.table + 1 with self.assertRaises(ValueError): 1 + self.table def test_table_order_by(self): with self.assertRaises(ValueError): self.table.order_by('doesnt_exist') before = [row.birthdate for row in self.table] self.table.order_by('birthdate') after = [row.birthdate for row in self.table] self.assertNotEqual(before, after) self.assertEqual(sorted(before), after) self.table.order_by('-birthdate') final = [row.birthdate for row in self.table] self.assertEqual(final, list(reversed(after))) self.table.order_by('name') expected_rows = [{ 'name': 'Douglas Adams', 'birthdate': datetime.date(1952, 3, 11) }, { 'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1) }, { 'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29) }] for expected_row, row in zip(expected_rows, self.table): self.assertEqual(expected_row, dict(row._asdict())) def test_table_repr(self): expected = '<rows.Table 2 fields, 3 rows>' self.assertEqual(expected, repr(self.table))
'unicode_column': 'álvaro', 'null_column': 'none'.encode('utf-8') }, { 'float_column': 1.2345, 'decimal_column': 1.2345, 'bool_column': False, 'integer_column': 6, 'date_column': datetime.date(2015, 5, 6), 'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2), 'percent_column': Decimal('0.02'), 'unicode_column': 'test', 'null_column': ''.encode('utf-8') }, ] table = Table(fields=expected_fields) for row in expected_rows: table.append(row) table._meta = {'test': 123} class RowsTestMixIn(object): maxDiff = None def setUp(self): self.files_to_delete = [] def tearDown(self): for filename in self.files_to_delete: os.unlink(filename)
'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2), 'percent_column': Decimal('0.02'), 'unicode_column': 'test', }, { 'float_column': None, 'decimal_column': None, 'bool_column': None, 'integer_column': None, 'date_column': None, 'datetime_column': None, 'percent_column': None, 'unicode_column': '', } ] table = Table(fields=FIELDS) for row in EXPECTED_ROWS: table.append(row) table._meta = {'test': 123} class RowsTestMixIn(object): maxDiff = None override_fields = None def setUp(self): self.files_to_delete = [] def tearDown(self): for filename in self.files_to_delete:
class TableTestCase(unittest.TestCase): def setUp(self): self.table = Table(fields={'name': rows.fields.UnicodeField, 'birthdate': rows.fields.DateField, }) self.first_row = {'name': u'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)} self.table.append(self.first_row) self.table.append({'name': u'Somebody', 'birthdate': datetime.date(1990, 2, 1)}) self.table.append({'name': u'Douglas Adams', 'birthdate': '1952-03-11'}) def test_Table_is_present_on_main_namespace(self): self.assertIn('Table', dir(rows)) self.assertIs(Table, rows.Table) def test_table_iteration(self): # TODO: may test with all field types (using tests.utils.table) table_rows = [row for row in self.table] self.assertEqual(len(table_rows), 3) self.assertEqual(table_rows[0].name, u'Álvaro Justen') self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29)) self.assertEqual(table_rows[1].name, u'Somebody') self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1)) self.assertEqual(table_rows[2].name, u'Douglas Adams') self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11)) def test_table_append_error(self): # TODO: may mock these validations and test only on *Field tests with self.assertRaises(ValueError) as context_manager: self.table.append({'name': 'Álvaro Justen'.encode('utf-8'), 'birthdate': '1987-04-29'}) self.assertEqual(type(context_manager.exception), UnicodeDecodeError) with self.assertRaises(ValueError) as context_manager: self.table.append({'name': u'Álvaro Justen', 'birthdate': 'WRONG'}) self.assertEqual(type(context_manager.exception), ValueError) self.assertIn('does not match format', context_manager.exception.message) def test_table_getitem_error(self): with self.assertRaises(ValueError) as context_manager: self.table['test'] def test_table_setitem(self): self.first_row['name'] = 'turicas' self.first_row['birthdate'] = datetime.date(2000, 1, 1) self.table[0] = self.first_row self.assertEqual(self.table[0].name, 'turicas') self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1)) def test_table_delitem(self): table_rows = [row for row in self.table] before = len(self.table) del self.table[0] after = len(self.table) self.assertEqual(after, before - 1) for row, expected_row in zip(self.table, table_rows[1:]): self.assertEqual(row, expected_row) def test_table_add(self): self.assertIs(self.table + 0, self.table) self.assertIs(0 + self.table, self.table) new_table = self.table + self.table self.assertEqual(new_table.fields, self.table.fields) self.assertEqual(len(new_table), 2 * len(self.table)) self.assertEqual(list(new_table), list(self.table) * 2) def test_table_add_error(self): with self.assertRaises(ValueError): self.table + 1 with self.assertRaises(ValueError): 1 + self.table def test_table_order_by(self): with self.assertRaises(ValueError): self.table.order_by('doesnt_exist') before = [row.birthdate for row in self.table] self.table.order_by('birthdate') after = [row.birthdate for row in self.table] self.assertNotEqual(before, after) self.assertEqual(sorted(before), after) self.table.order_by('-birthdate') final = [row.birthdate for row in self.table] self.assertEqual(final, list(reversed(after))) self.table.order_by('name') expected_rows = [ {'name': 'Douglas Adams', 'birthdate': datetime.date(1952, 3, 11)}, {'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1)}, {'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)}] for expected_row, row in zip(expected_rows, self.table): self.assertEqual(expected_row, dict(row._asdict()))
def create_table(data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs): """Create a rows.Table object based on data rows and some configurations - `skip_header` is only used if `fields` is set - `samples` is only used if `fields` is `None`. If samples=None, all data is filled in memory - use with caution. - `force_types` is only used if `fields` is `None` - `import_fields` can be used either if `fields` is set or not, the resulting fields will seek its order - `fields` must always be in the same order as the data """ table_rows = iter(data) force_types = force_types or {} if import_fields is not None: import_fields = make_header(import_fields) if fields is None: # autodetect field types # TODO: may add `type_hints` parameter so autodetection can be easier # (plugins may specify some possible field types). header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) table_rows = chain(sample_rows, table_rows) else: sample_rows = table_rows = list(table_rows) # Detect field types using only the desired columns detected_fields = detect_types( header, sample_rows, skip_indexes=[ index for index, field in enumerate(header) if field in force_types or field not in ( import_fields or header) ], *args, **kwargs) # Check if any field was added during detecting process new_fields = [ field_name for field_name in detected_fields.keys() if field_name not in header ] # Finally create the `fields` with both header and new field names, # based on detected fields `and force_types` fields = OrderedDict([(field_name, detected_fields.get(field_name, TextField)) for field_name in header + new_fields]) fields.update(force_types) # Update `header` and `import_fields` based on new `fields` header = list(fields.keys()) if import_fields is None: import_fields = header else: # using provided field types if not isinstance(fields, OrderedDict): raise ValueError("`fields` must be an `OrderedDict`") if skip_header: # If we're skipping the header probably this row is not trustable # (can be data or garbage). _ = next(table_rows) header = make_header(list(fields.keys())) if import_fields is None: import_fields = header fields = OrderedDict([(field_name, fields[key]) for field_name, key in zip(header, fields)]) diff = set(import_fields) - set(header) if diff: field_names = ", ".join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) fields = OrderedDict([(field_name, fields[field_name]) for field_name in import_fields]) get_row = get_items(*map(header.index, import_fields)) table = Table(fields=fields, meta=meta) table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows) return table
def create_table( data, meta=None, fields=None, skip_header=True, import_fields=None, samples=None, force_types=None, *args, **kwargs ): """Create a rows.Table object based on data rows and some configurations - `skip_header` is only used if `fields` is set - `samples` is only used if `fields` is `None`. If samples=None, all data is filled in memory - use with caution. - `force_types` is only used if `fields` is `None` - `import_fields` can be used either if `fields` is set or not, the resulting fields will seek its order - `fields` must always be in the same order as the data """ table_rows = iter(data) force_types = force_types or {} if import_fields is not None: import_fields = make_header(import_fields) if fields is None: # autodetect field types # TODO: may add `type_hints` parameter so autodetection can be easier # (plugins may specify some possible field types). header = make_header(next(table_rows)) if samples is not None: sample_rows = list(islice(table_rows, 0, samples)) table_rows = chain(sample_rows, table_rows) else: sample_rows = table_rows = list(table_rows) # Detect field types using only the desired columns detected_fields = detect_types( header, sample_rows, skip_indexes=[ index for index, field in enumerate(header) if field in force_types or field not in (import_fields or header) ], *args, **kwargs ) # Check if any field was added during detecting process new_fields = [ field_name for field_name in detected_fields.keys() if field_name not in header ] # Finally create the `fields` with both header and new field names, # based on detected fields `and force_types` fields = OrderedDict( [ (field_name, detected_fields.get(field_name, TextField)) for field_name in header + new_fields ] ) fields.update(force_types) # Update `header` and `import_fields` based on new `fields` header = list(fields.keys()) if import_fields is None: import_fields = header else: # using provided field types if not isinstance(fields, OrderedDict): raise ValueError("`fields` must be an `OrderedDict`") if skip_header: # If we're skipping the header probably this row is not trustable # (can be data or garbage). _ = next(table_rows) header = make_header(list(fields.keys())) if import_fields is None: import_fields = header fields = OrderedDict( [(field_name, fields[key]) for field_name, key in zip(header, fields)] ) diff = set(import_fields) - set(header) if diff: field_names = ", ".join('"{}"'.format(field) for field in diff) raise ValueError("Invalid field names: {}".format(field_names)) fields = OrderedDict( [(field_name, fields[field_name]) for field_name in import_fields] ) get_row = get_items(*map(header.index, import_fields)) table = Table(fields=fields, meta=meta) table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows) return table