示例#1
0
def join(keys, tables):
    """Merge a list of `Table` objects using `keys` to group rows"""

    # Make new (merged) Table fields
    fields = OrderedDict()
    for table in tables:
        fields.update(table.fields)
    # TODO: may raise an error if a same field is different in some tables

    # Check if all keys are inside merged Table's fields
    fields_keys = set(fields.keys())
    for key in keys:
        if key not in fields_keys:
            raise ValueError('Invalid key: "{}"'.format(key))

    # Group rows by key, without missing ordering
    none_fields = lambda: OrderedDict({field: None for field in fields.keys()})
    data = OrderedDict()
    for table in tables:
        for row in table:
            row_key = tuple([getattr(row, key) for key in keys])
            if row_key not in data:
                data[row_key] = none_fields()
            data[row_key].update(row._asdict())

    merged = Table(fields=fields)
    merged.extend(data.values())
    return merged
示例#2
0
def join(keys, tables):
    """Merge a list of `Table` objects using `keys` to group rows"""

    # Make new (merged) Table fields
    fields = OrderedDict()
    for table in tables:
        fields.update(table.fields)
    # TODO: may raise an error if a same field is different in some tables

    # Check if all keys are inside merged Table's fields
    fields_keys = set(fields.keys())
    for key in keys:
        if key not in fields_keys:
            raise ValueError('Invalid key: "{}"'.format(key))

    # Group rows by key, without missing ordering
    none_fields = lambda: OrderedDict({field: None for field in fields.keys()})
    data = OrderedDict()
    for table in tables:
        for row in table:
            row_key = tuple([getattr(row, key) for key in keys])
            if row_key not in data:
                data[row_key] = none_fields()
            data[row_key].update(row._asdict())

    merged = Table(fields=fields)
    merged.extend(data.values())
    return merged
示例#3
0
def create_table(data, meta=None, force_headers=None, fields=None,
                 skip_header=True, *args, **kwargs):
    # TODO: add auto_detect_types=True parameter
    table_rows = list(data)

    if fields is None:
        if force_headers is None:
            header = make_header(table_rows[0])
            table_rows = table_rows[1:]
        else:
            header = force_headers
        fields = detect_types(header, table_rows, *args, **kwargs)
    else:
        if skip_header:
            table_rows = table_rows[1:]
            header = make_header(fields.keys())
            assert type(fields) is collections.OrderedDict
            fields = {field_name: fields[key]
                      for field_name, key in zip(header, fields)}
        else:
            header = make_header(fields.keys())

        # TODO: may reuse max_columns from html
        max_columns = max(len(row) for row in table_rows)
        assert len(fields) == max_columns

    # TODO: put this inside Table.__init__
    table = Table(fields=fields, meta=meta)
    for row in table_rows:
        table.append({field_name: value
                      for field_name, value in zip(header, row)})

    return table
示例#4
0
def transform(fields, function, *tables):
    "Return a new table based on other tables and a transformation function"

    new_table = Table(fields=fields)

    for table in tables:
        for row in filter(bool, map(lambda row: function(row, table), table)):
            new_table.append(row)

    return new_table
示例#5
0
def transform(fields, function, *tables):
    "Return a new table based on other tables and a transformation function"

    new_table = Table(fields=fields)

    for table in tables:
        for row in filter(bool, map(lambda row: function(row, table), table)):
            new_table.append(row)

    return new_table
示例#6
0
 def setUp(self):
     self.table = Table(fields={'name': rows.fields.UnicodeField,
                                'birthdate': rows.fields.DateField, })
     self.first_row = {'name': u'Álvaro Justen',
                       'birthdate': datetime.date(1987, 4, 29)}
     self.table.append(self.first_row)
     self.table.append({'name': u'Somebody',
                        'birthdate': datetime.date(1990, 2, 1)})
     self.table.append({'name': u'Douglas Adams',
                        'birthdate': '1952-03-11'})
示例#7
0
文件: utils.py 项目: berinhard/rows
def create_table(data, meta=None, fields=None, skip_header=True,
                 import_fields=None, samples=None, force_types=None,
                 *args, **kwargs):
    # TODO: add auto_detect_types=True parameter
    table_rows = iter(data)
    sample_rows = []

    if fields is None:
        header = make_header(next(table_rows))

        if samples is not None:
            sample_rows = list(islice(table_rows, 0, samples))
        else:
            sample_rows = list(table_rows)

        fields = detect_types(header, sample_rows, *args, **kwargs)

        if force_types is not None:
            # TODO: optimize field detection (ignore fields on `force_types`)
            for field_name, field_type in force_types.items():
                fields[field_name] = field_type
    else:
        if not isinstance(fields, OrderedDict):
            raise ValueError('`fields` must be an `OrderedDict`')

        if skip_header:
            _ = next(table_rows)

        header = make_header(list(fields.keys()))
        fields = OrderedDict([(field_name, fields[key])
                              for field_name, key in zip(header, fields)])

    if import_fields is not None:
        # TODO: can optimize if import_fields is not None.
        #       Example: do not detect all columns
        import_fields = make_header(import_fields)

        diff = set(import_fields) - set(header)
        if diff:
            field_names = ', '.join('"{}"'.format(field) for field in diff)
            raise ValueError("Invalid field names: {}".format(field_names))

        new_fields = OrderedDict()
        for field_name in import_fields:
            new_fields[field_name] = fields[field_name]
        fields = new_fields

    table = Table(fields=fields, meta=meta)
    # TODO: put this inside Table.__init__
    for row in chain(sample_rows, table_rows):
        table.append({field_name: value
                      for field_name, value in zip(header, row)})

    return table
示例#8
0
def import_from_uwsgi_log(filename):
    fields = UWSGI_FIELDS.keys()
    table = Table(fields=UWSGI_FIELDS)
    with open(filename) as fobj:
        for line in fobj:
            result = REGEXP_UWSGI_LOG.findall(line)
            if result:
                data = list(result[0])
                # Convert datetime
                data[2] = strptime(data[2], UWSGI_DATETIME_FORMAT)
                # Convert generation time (micros -> seconds)
                data[5] = float(data[5]) / 1000000
                table.append({field_name: value
                              for field_name, value in zip(fields, data)})
    return table
示例#9
0
 def setUp(self):
     self.table = Table(fields={
         "name": rows.fields.TextField,
         "birthdate": rows.fields.DateField
     })
     self.first_row = {
         "name": "Álvaro Justen",
         "birthdate": datetime.date(1987, 4, 29),
     }
     self.table.append(self.first_row)
     self.table.append({
         "name": "Somebody",
         "birthdate": datetime.date(1990, 2, 1)
     })
     self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})
示例#10
0
def import_from_uwsgi_log(filename):
    fields = UWSGI_FIELDS.keys()
    table = Table(fields=UWSGI_FIELDS)
    with open(filename) as fobj:
        for line in fobj:
            result = REGEXP_UWSGI_LOG.findall(line)
            if result:
                data = list(result[0])
                # Convert datetime
                data[2] = strptime(data[2], UWSGI_DATETIME_FORMAT)
                # Convert generation time (micros -> seconds)
                data[5] = float(data[5]) / 1000000
                table.append({
                    field_name: value
                    for field_name, value in zip(fields, data)
                })
    return table
示例#11
0
 def setUp(self):
     self.table = Table(
         fields={"name": rows.fields.TextField, "birthdate": rows.fields.DateField}
     )
     self.first_row = {
         "name": "Álvaro Justen",
         "birthdate": datetime.date(1987, 4, 29),
     }
     self.table.append(self.first_row)
     self.table.append({"name": "Somebody", "birthdate": datetime.date(1990, 2, 1)})
     self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})
示例#12
0
class TableTestCase(unittest.TestCase):
    def setUp(self):
        self.table = Table(fields={
            'name': rows.fields.TextField,
            'birthdate': rows.fields.DateField,
        })
        self.first_row = {
            'name': 'Álvaro Justen',
            'birthdate': datetime.date(1987, 4, 29)
        }
        self.table.append(self.first_row)
        self.table.append({
            'name': 'Somebody',
            'birthdate': datetime.date(1990, 2, 1)
        })
        self.table.append({'name': 'Douglas Adams', 'birthdate': '1952-03-11'})

    def test_Table_is_present_on_main_namespace(self):
        self.assertIn('Table', dir(rows))
        self.assertIs(Table, rows.Table)

    def test_table_iteration(self):
        # TODO: may test with all field types (using tests.utils.table)

        table_rows = [row for row in self.table]
        self.assertEqual(len(table_rows), 3)
        self.assertEqual(table_rows[0].name, 'Álvaro Justen')
        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
        self.assertEqual(table_rows[1].name, 'Somebody')
        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
        self.assertEqual(table_rows[2].name, 'Douglas Adams')
        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))

    def test_table_slicing(self):
        self.assertEqual(len(self.table[::2]), 2)
        self.assertEqual(self.table[::2][0].name, 'Álvaro Justen')

    def test_table_slicing_error(self):
        with self.assertRaises(ValueError) as context_manager:
            self.table[[1]]
        self.assertEqual(type(context_manager.exception), ValueError)

    def test_table_insert_row(self):
        self.table.insert(1, {
            'name': 'Grace Hopper',
            'birthdate': datetime.date(1909, 12, 9)
        })
        self.assertEqual(self.table[1].name, 'Grace Hopper')

    def test_table_append_error(self):
        # TODO: may mock these validations and test only on *Field tests
        with self.assertRaises(ValueError) as context_manager:
            self.table.append({
                'name': 'Álvaro Justen'.encode('utf-8'),
                'birthdate': '1987-04-29'
            })
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertEqual(context_manager.exception.args[0],
                         'Binary is not supported')

        with self.assertRaises(ValueError) as context_manager:
            self.table.append({'name': 'Álvaro Justen', 'birthdate': 'WRONG'})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertIn('does not match format',
                      context_manager.exception.args[0])

    def test_table_getitem_invalid_type(self):
        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14]
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'name']
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: {}'.format(binary_type_name))

    def test_table_getitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            self.table['doesnt-exist']

        self.assertEqual(exception_context.exception.args[0], 'doesnt-exist')

    def test_table_getitem_column_happy_path(self):
        expected_values = ['Álvaro Justen', 'Somebody', 'Douglas Adams']
        self.assertEqual(self.table['name'], expected_values)

        expected_values = [
            datetime.date(1987, 4, 29),
            datetime.date(1990, 2, 1),
            datetime.date(1952, 3, 11)
        ]
        self.assertEqual(self.table['birthdate'], expected_values)

    def test_table_setitem_row(self):
        self.first_row['name'] = 'turicas'
        self.first_row['birthdate'] = datetime.date(2000, 1, 1)
        self.table[0] = self.first_row
        self.assertEqual(self.table[0].name, 'turicas')
        self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1))

    def test_field_names_and_types(self):
        self.assertEqual(self.table.field_names,
                         list(self.table.fields.keys()))
        self.assertEqual(self.table.field_types,
                         list(self.table.fields.values()))

    def test_table_setitem_column_happy_path_new_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table['user_id'] = [4, 5, 6]

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields + 1)

        self.assertIn('user_id', self.table.fields)
        self.assertIs(self.table.fields['user_id'], rows.fields.IntegerField)
        self.assertEqual(self.table[0].user_id, 4)
        self.assertEqual(self.table[1].user_id, 5)
        self.assertEqual(self.table[2].user_id, 6)

    def test_table_setitem_column_happy_path_replace_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table['name'] = [4, 5, 6]  # change values *and* type

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)

        self.assertIn('name', self.table.fields)
        self.assertIs(self.table.fields['name'], rows.fields.IntegerField)
        self.assertEqual(self.table[0].name, 4)
        self.assertEqual(self.table[1].name, 5)
        self.assertEqual(self.table[2].name, 6)

    def test_table_setitem_column_slug_field_name(self):
        self.assertNotIn('user_id', self.table.fields)
        self.table['User ID'] = [4, 5, 6]
        self.assertIn('user_id', self.table.fields)

    def test_table_setitem_column_invalid_length(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table['user_id'] = [4, 5]  # list len should be 3

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)
        self.assertEqual(
            exception_context.exception.args[0],
            'Values length (2) should be the same as Table '
            'length (3)')

    def test_table_setitem_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'some_value'] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: {}'.format(binary_type_name))

    def test_table_delitem_row(self):
        table_rows = [row for row in self.table]
        before = len(self.table)
        del self.table[0]
        after = len(self.table)
        self.assertEqual(after, before - 1)
        for row, expected_row in zip(self.table, table_rows[1:]):
            self.assertEqual(row, expected_row)

    def test_table_delitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            del self.table['doesnt-exist']

        self.assertEqual(exception_context.exception.args[0], 'doesnt-exist')

    def test_table_delitem_column_happy_path(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        del self.table['name']

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertEqual(len(self.table.fields), len(fields) - 1)

        self.assertDictEqual(dict(self.table[0]._asdict()),
                             {'birthdate': datetime.date(1987, 4, 29)})
        self.assertDictEqual(dict(self.table[1]._asdict()),
                             {'birthdate': datetime.date(1990, 2, 1)})
        self.assertDictEqual(dict(self.table[2]._asdict()),
                             {'birthdate': datetime.date(1952, 3, 11)})

    def test_table_delitem_column_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            del self.table[3.14]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'name'] = []  # 'name' actually exists

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: {}'.format(binary_type_name))

    def test_table_add(self):
        self.assertIs(self.table + 0, self.table)
        self.assertIs(0 + self.table, self.table)

        new_table = self.table + self.table
        self.assertEqual(new_table.fields, self.table.fields)
        self.assertEqual(len(new_table), 2 * len(self.table))
        self.assertEqual(list(new_table), list(self.table) * 2)

    def test_table_add_error(self):
        with self.assertRaises(ValueError):
            self.table + 1
        with self.assertRaises(ValueError):
            1 + self.table

    def test_table_order_by(self):
        with self.assertRaises(ValueError):
            self.table.order_by('doesnt_exist')

        before = [row.birthdate for row in self.table]
        self.table.order_by('birthdate')
        after = [row.birthdate for row in self.table]
        self.assertNotEqual(before, after)
        self.assertEqual(sorted(before), after)

        self.table.order_by('-birthdate')
        final = [row.birthdate for row in self.table]
        self.assertEqual(final, list(reversed(after)))

        self.table.order_by('name')
        expected_rows = [{
            'name': 'Douglas Adams',
            'birthdate': datetime.date(1952, 3, 11)
        }, {
            'name': 'Somebody',
            'birthdate': datetime.date(1990, 2, 1)
        }, {
            'name': 'Álvaro Justen',
            'birthdate': datetime.date(1987, 4, 29)
        }]
        for expected_row, row in zip(expected_rows, self.table):
            self.assertEqual(expected_row, dict(row._asdict()))

    def test_table_repr(self):
        expected = '<rows.Table 2 fields, 3 rows>'
        self.assertEqual(expected, repr(self.table))

    def test_table_add_time(self):
        '''rows.Table.__add__ should be constant time

        To test it we double table size for each round and then compare the
        standard deviation to the mean (it will be almost the mean if the
        algorithm is not fast enough and almost 10% of the mean if it's good).
        '''
        rounds = []
        table = utils.table
        for _ in range(5):
            start = time.time()
            table = table + table
            end = time.time()
            rounds.append(end - start)

        mean = sum(rounds) / len(rounds)
        stdev = math.sqrt((1.0 / (len(rounds) - 1)) * sum(
            (value - mean)**2 for value in rounds))
        self.assertTrue(0.2 * mean > stdev)
示例#13
0
class TableTestCase(unittest.TestCase):
    def setUp(self):
        self.table = Table(fields={
            "name": rows.fields.TextField,
            "birthdate": rows.fields.DateField
        })
        self.first_row = {
            "name": "Álvaro Justen",
            "birthdate": datetime.date(1987, 4, 29),
        }
        self.table.append(self.first_row)
        self.table.append({
            "name": "Somebody",
            "birthdate": datetime.date(1990, 2, 1)
        })
        self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})

    def test_table_init_slug_creation_on_fields(self):
        table = rows.Table(fields=collections.OrderedDict([(
            'Query Occurrence"( % ),"First Seen', rows.fields.FloatField)]))

        self.assertIn("query_occurrence_first_seen", table.fields)

    def test_Table_is_present_on_main_namespace(self):
        self.assertIn("Table", dir(rows))
        self.assertIs(Table, rows.Table)

    def test_table_iteration(self):
        # TODO: may test with all field types (using tests.utils.table)

        table_rows = [row for row in self.table]
        self.assertEqual(len(table_rows), 3)
        self.assertEqual(table_rows[0].name, "Álvaro Justen")
        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
        self.assertEqual(table_rows[1].name, "Somebody")
        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
        self.assertEqual(table_rows[2].name, "Douglas Adams")
        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))

    def test_table_slicing(self):
        self.assertEqual(len(self.table[::2]), 2)
        self.assertEqual(self.table[::2][0].name, "Álvaro Justen")

    def test_table_slicing_error(self):
        with self.assertRaises(ValueError) as context_manager:
            self.table[[1]]
        self.assertEqual(type(context_manager.exception), ValueError)

    def test_table_insert_row(self):
        self.table.insert(1, {
            "name": "Grace Hopper",
            "birthdate": datetime.date(1909, 12, 9)
        })
        self.assertEqual(self.table[1].name, "Grace Hopper")

    def test_table_append_error(self):
        # TODO: may mock these validations and test only on *Field tests
        with self.assertRaises(ValueError) as context_manager:
            self.table.append({
                "name": "Álvaro Justen".encode("utf-8"),
                "birthdate": "1987-04-29"
            })
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertEqual(context_manager.exception.args[0],
                         "Binary is not supported")

        with self.assertRaises(ValueError) as context_manager:
            self.table.append({"name": "Álvaro Justen", "birthdate": "WRONG"})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertIn("does not match format",
                      context_manager.exception.args[0])

    def test_table_getitem_invalid_type(self):
        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14]
        self.assertEqual(exception_context.exception.args[0],
                         "Unsupported key type: float")

        with self.assertRaises(ValueError) as exception_context:
            self.table[b"name"]
        self.assertEqual(
            exception_context.exception.args[0],
            "Unsupported key type: {}".format(binary_type_name),
        )

    def test_table_getitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            self.table["doesnt-exist"]

        self.assertEqual(exception_context.exception.args[0], "doesnt-exist")

    def test_table_getitem_column_happy_path(self):
        expected_values = ["Álvaro Justen", "Somebody", "Douglas Adams"]
        self.assertEqual(self.table["name"], expected_values)

        expected_values = [
            datetime.date(1987, 4, 29),
            datetime.date(1990, 2, 1),
            datetime.date(1952, 3, 11),
        ]
        self.assertEqual(self.table["birthdate"], expected_values)

    def test_table_setitem_row(self):
        self.first_row["name"] = "turicas"
        self.first_row["birthdate"] = datetime.date(2000, 1, 1)
        self.table[0] = self.first_row
        self.assertEqual(self.table[0].name, "turicas")
        self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1))

    def test_field_names_and_types(self):
        self.assertEqual(self.table.field_names,
                         list(self.table.fields.keys()))
        self.assertEqual(self.table.field_types,
                         list(self.table.fields.values()))

    def test_table_setitem_column_happy_path_new_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table["user_id"] = [4, 5, 6]

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields + 1)

        self.assertIn("user_id", self.table.fields)
        self.assertIs(self.table.fields["user_id"], rows.fields.IntegerField)
        self.assertEqual(self.table[0].user_id, 4)
        self.assertEqual(self.table[1].user_id, 5)
        self.assertEqual(self.table[2].user_id, 6)

    def test_table_setitem_column_happy_path_replace_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table["name"] = [4, 5, 6]  # change values *and* type

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)

        self.assertIn("name", self.table.fields)
        self.assertIs(self.table.fields["name"], rows.fields.IntegerField)
        self.assertEqual(self.table[0].name, 4)
        self.assertEqual(self.table[1].name, 5)
        self.assertEqual(self.table[2].name, 6)

    def test_table_setitem_column_slug_field_name(self):
        self.assertNotIn("user_id", self.table.fields)
        self.table["User ID"] = [4, 5, 6]
        self.assertIn("user_id", self.table.fields)

    def test_table_setitem_column_invalid_length(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table["user_id"] = [4, 5]  # list len should be 3

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)
        self.assertEqual(
            exception_context.exception.args[0],
            "Values length (2) should be the same as Table "
            "length (3)",
        )

    def test_table_setitem_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.args[0],
                         "Unsupported key type: float")

        with self.assertRaises(ValueError) as exception_context:
            self.table[b"some_value"] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(
            exception_context.exception.args[0],
            "Unsupported key type: {}".format(binary_type_name),
        )

    def test_table_delitem_row(self):
        table_rows = [row for row in self.table]
        before = len(self.table)
        del self.table[0]
        after = len(self.table)
        self.assertEqual(after, before - 1)
        for row, expected_row in zip(self.table, table_rows[1:]):
            self.assertEqual(row, expected_row)

    def test_table_delitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            del self.table["doesnt-exist"]

        self.assertEqual(exception_context.exception.args[0], "doesnt-exist")

    def test_table_delitem_column_happy_path(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        del self.table["name"]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertEqual(len(self.table.fields), len(fields) - 1)

        self.assertDictEqual(dict(self.table[0]._asdict()),
                             {"birthdate": datetime.date(1987, 4, 29)})
        self.assertDictEqual(dict(self.table[1]._asdict()),
                             {"birthdate": datetime.date(1990, 2, 1)})
        self.assertDictEqual(dict(self.table[2]._asdict()),
                             {"birthdate": datetime.date(1952, 3, 11)})

    def test_table_delitem_column_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            del self.table[3.14]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.args[0],
                         "Unsupported key type: float")

        with self.assertRaises(ValueError) as exception_context:
            self.table[b"name"] = []  # 'name' actually exists

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(
            exception_context.exception.args[0],
            "Unsupported key type: {}".format(binary_type_name),
        )

    def test_table_add(self):
        self.assertIs(self.table + 0, self.table)
        self.assertIs(0 + self.table, self.table)

        new_table = self.table + self.table
        self.assertEqual(new_table.fields, self.table.fields)
        self.assertEqual(len(new_table), 2 * len(self.table))
        self.assertEqual(list(new_table), list(self.table) * 2)

    def test_table_add_error(self):
        with self.assertRaises(ValueError):
            self.table + 1
        with self.assertRaises(ValueError):
            1 + self.table

    def test_table_order_by(self):
        with self.assertRaises(ValueError):
            self.table.order_by("doesnt_exist")

        before = [row.birthdate for row in self.table]
        self.table.order_by("birthdate")
        after = [row.birthdate for row in self.table]
        self.assertNotEqual(before, after)
        self.assertEqual(sorted(before), after)

        self.table.order_by("-birthdate")
        final = [row.birthdate for row in self.table]
        self.assertEqual(final, list(reversed(after)))

        self.table.order_by("name")
        expected_rows = [
            {
                "name": "Douglas Adams",
                "birthdate": datetime.date(1952, 3, 11)
            },
            {
                "name": "Somebody",
                "birthdate": datetime.date(1990, 2, 1)
            },
            {
                "name": "Álvaro Justen",
                "birthdate": datetime.date(1987, 4, 29)
            },
        ]
        for expected_row, row in zip(expected_rows, self.table):
            self.assertEqual(expected_row, dict(row._asdict()))

    def test_table_repr(self):
        expected = "<rows.Table 2 fields, 3 rows>"
        self.assertEqual(expected, repr(self.table))

    def test_table_add_should_not_iterate_over_rows(self):
        table1 = rows.Table(fields={
            "f1": rows.fields.IntegerField,
            "f2": rows.fields.FloatField
        })
        table2 = rows.Table(fields={
            "f1": rows.fields.IntegerField,
            "f2": rows.fields.FloatField
        })
        table1._rows = mock.Mock()
        table1._rows.__add__ = mock.Mock()
        table1._rows.__iter__ = mock.Mock()
        table2._rows = mock.Mock()
        table2._rows.__add__ = mock.Mock()
        table2._rows.__iter__ = mock.Mock()

        self.assertFalse(table1._rows.__add__.called)
        self.assertFalse(table2._rows.__add__.called)
        self.assertFalse(table1._rows.__iter__.called)
        self.assertFalse(table2._rows.__iter__.called)
        table1 + table2
        self.assertTrue(table1._rows.__add__.called)
        self.assertFalse(table2._rows.__add__.called)
        self.assertFalse(table1._rows.__iter__.called)
        self.assertFalse(table2._rows.__iter__.called)
示例#14
0
def create_table(data,
                 meta=None,
                 fields=None,
                 skip_header=True,
                 import_fields=None,
                 samples=None,
                 force_types=None,
                 *args,
                 **kwargs):
    # TODO: add auto_detect_types=True parameter
    table_rows = iter(data)
    sample_rows = []

    if fields is None:
        header = make_header(next(table_rows))

        if samples is not None:
            sample_rows = list(islice(table_rows, 0, samples))
        else:
            sample_rows = list(table_rows)

        fields = detect_types(header, sample_rows, *args, **kwargs)

        if force_types is not None:
            # TODO: optimize field detection (ignore fields on `force_types`)
            for field_name, field_type in force_types.items():
                fields[field_name] = field_type
    else:
        if not isinstance(fields, OrderedDict):
            raise ValueError('`fields` must be an `OrderedDict`')

        if skip_header:
            next(table_rows)

        header = make_header(list(fields.keys()))
        fields = OrderedDict([(field_name, fields[key])
                              for field_name, key in zip(header, fields)])

    if import_fields is not None:
        # TODO: can optimize if import_fields is not None.
        #       Example: do not detect all columns
        import_fields = make_header(import_fields)

        diff = set(import_fields) - set(header)
        if diff:
            field_names = ', '.join('"{}"'.format(field) for field in diff)
            raise ValueError("Invalid field names: {}".format(field_names))

        new_fields = OrderedDict()
        for field_name in import_fields:
            new_fields[field_name] = fields[field_name]
        fields = new_fields

    table = Table(fields=fields, meta=meta)
    # TODO: put this inside Table.__init__
    for row in chain(sample_rows, table_rows):
        table.append(
            {field_name: value
             for field_name, value in zip(header, row)})

    return table
示例#15
0
class TableTestCase(unittest.TestCase):

    def setUp(self):
        self.table = Table(fields={'name': rows.fields.TextField,
                                   'birthdate': rows.fields.DateField, })
        self.first_row = {'name': 'Álvaro Justen',
                          'birthdate': datetime.date(1987, 4, 29)}
        self.table.append(self.first_row)
        self.table.append({'name': 'Somebody',
                           'birthdate': datetime.date(1990, 2, 1)})
        self.table.append({'name': 'Douglas Adams',
                           'birthdate': '1952-03-11'})

    def test_Table_is_present_on_main_namespace(self):
        self.assertIn('Table', dir(rows))
        self.assertIs(Table, rows.Table)

    def test_table_iteration(self):
        # TODO: may test with all field types (using tests.utils.table)

        table_rows = [row for row in self.table]
        self.assertEqual(len(table_rows), 3)
        self.assertEqual(table_rows[0].name, 'Álvaro Justen')
        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
        self.assertEqual(table_rows[1].name, 'Somebody')
        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
        self.assertEqual(table_rows[2].name, 'Douglas Adams')
        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))

    def test_table_slicing(self):
        self.assertEqual(len(self.table[::2]), 2)
        self.assertEqual(self.table[::2][0].name, 'Álvaro Justen')

    def test_table_slicing_error(self):
        with self.assertRaises(ValueError) as context_manager:
            self.table[[1]]
        self.assertEqual(type(context_manager.exception), ValueError)

    def test_table_insert_row(self):
        self.table.insert(1, {'name': 'Grace Hopper',
                              'birthdate': datetime.date(1909, 12, 9)})
        self.assertEqual(self.table[1].name, 'Grace Hopper')

    def test_table_append_error(self):
        # TODO: may mock these validations and test only on *Field tests
        with self.assertRaises(ValueError) as context_manager:
            self.table.append({'name': 'Álvaro Justen'.encode('utf-8'),
                               'birthdate': '1987-04-29'})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertEqual(context_manager.exception.args[0],
                         'Binary is not supported')

        with self.assertRaises(ValueError) as context_manager:
            self.table.append({'name': 'Álvaro Justen', 'birthdate': 'WRONG'})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertIn('does not match format',
                      context_manager.exception.args[0])

    def test_table_getitem_invalid_type(self):
        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14]
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'name']
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: {}'.format(binary_type_name))

    def test_table_getitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            self.table['doesnt-exist']

        self.assertEqual(exception_context.exception.args[0],
                         'doesnt-exist')

    def test_table_getitem_column_happy_path(self):
        expected_values = ['Álvaro Justen', 'Somebody', 'Douglas Adams']
        self.assertEqual(self.table['name'], expected_values)

        expected_values = [
                datetime.date(1987, 4, 29),
                datetime.date(1990, 2, 1),
                datetime.date(1952, 3, 11)]
        self.assertEqual(self.table['birthdate'], expected_values)

    def test_table_setitem_row(self):
        self.first_row['name'] = 'turicas'
        self.first_row['birthdate'] = datetime.date(2000, 1, 1)
        self.table[0] = self.first_row
        self.assertEqual(self.table[0].name, 'turicas')
        self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1))

    def test_field_names_and_types(self):
        self.assertEqual(self.table.field_names,
                         list(self.table.fields.keys()))
        self.assertEqual(self.table.field_types,
                         list(self.table.fields.values()))

    def test_table_setitem_column_happy_path_new_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table['user_id'] = [4, 5, 6]

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields + 1)

        self.assertIn('user_id', self.table.fields)
        self.assertIs(self.table.fields['user_id'], rows.fields.IntegerField)
        self.assertEqual(self.table[0].user_id, 4)
        self.assertEqual(self.table[1].user_id, 5)
        self.assertEqual(self.table[2].user_id, 6)

    def test_table_setitem_column_happy_path_replace_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table['name'] = [4, 5, 6]  # change values *and* type

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)

        self.assertIn('name', self.table.fields)
        self.assertIs(self.table.fields['name'], rows.fields.IntegerField)
        self.assertEqual(self.table[0].name, 4)
        self.assertEqual(self.table[1].name, 5)
        self.assertEqual(self.table[2].name, 6)


    def test_table_setitem_column_slug_field_name(self):
        self.assertNotIn('user_id', self.table.fields)
        self.table['User ID'] = [4, 5, 6]
        self.assertIn('user_id', self.table.fields)

    def test_table_setitem_column_invalid_length(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table['user_id'] = [4, 5]  # list len should be 3

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)
        self.assertEqual(exception_context.exception.args[0],
                         'Values length (2) should be the same as Table '
                         'length (3)')

    def test_table_setitem_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'some_value'] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: {}'.format(binary_type_name))

    def test_table_delitem_row(self):
        table_rows = [row for row in self.table]
        before = len(self.table)
        del self.table[0]
        after = len(self.table)
        self.assertEqual(after, before - 1)
        for row, expected_row in zip(self.table, table_rows[1:]):
            self.assertEqual(row, expected_row)

    def test_table_delitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            del self.table['doesnt-exist']

        self.assertEqual(exception_context.exception.args[0],
                         'doesnt-exist')

    def test_table_delitem_column_happy_path(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        del self.table['name']

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertEqual(len(self.table.fields), len(fields) - 1)

        self.assertDictEqual(dict(self.table[0]._asdict()),
                             {'birthdate': datetime.date(1987, 4, 29)})
        self.assertDictEqual(dict(self.table[1]._asdict()),
                             {'birthdate': datetime.date(1990, 2, 1)})
        self.assertDictEqual(dict(self.table[2]._asdict()),
                             {'birthdate': datetime.date(1952, 3, 11)})

    def test_table_delitem_column_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            del self.table[3.14]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'name'] = []  # 'name' actually exists

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.args[0],
                         'Unsupported key type: {}'.format(binary_type_name))

    def test_table_add(self):
        self.assertIs(self.table + 0, self.table)
        self.assertIs(0 + self.table, self.table)

        new_table = self.table + self.table
        self.assertEqual(new_table.fields, self.table.fields)
        self.assertEqual(len(new_table), 2 * len(self.table))
        self.assertEqual(list(new_table), list(self.table) * 2)

    def test_table_add_error(self):
        with self.assertRaises(ValueError):
            self.table + 1
        with self.assertRaises(ValueError):
            1 + self.table

    def test_table_order_by(self):
        with self.assertRaises(ValueError):
            self.table.order_by('doesnt_exist')

        before = [row.birthdate for row in self.table]
        self.table.order_by('birthdate')
        after = [row.birthdate for row in self.table]
        self.assertNotEqual(before, after)
        self.assertEqual(sorted(before), after)

        self.table.order_by('-birthdate')
        final = [row.birthdate for row in self.table]
        self.assertEqual(final, list(reversed(after)))

        self.table.order_by('name')
        expected_rows = [
            {'name': 'Douglas Adams', 'birthdate': datetime.date(1952, 3, 11)},
            {'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1)},
            {'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)}]
        for expected_row, row in zip(expected_rows, self.table):
            self.assertEqual(expected_row, dict(row._asdict()))

    def test_table_repr(self):
        expected = '<rows.Table 2 fields, 3 rows>'
        self.assertEqual(expected, repr(self.table))

    def test_table_add_time(self):
        '''rows.Table.__add__ should be constant time

        To test it we double table size for each round and then compare the
        standard deviation to the mean (it will be almost the mean if the
        algorithm is not fast enough and almost 10% of the mean if it's good).
        '''
        rounds = []
        table = utils.table
        for _ in range(10):
            start = time.time()
            table = table + table
            end = time.time()
            rounds.append(end - start)

        mean = sum(rounds) / len(rounds)
        stdev = math.sqrt((1.0 / (len(rounds) - 1)) *
                          sum((value - mean) ** 2 for value in rounds))
        self.assertTrue(0.2 * mean > stdev)
示例#16
0
class TableTestCase(unittest.TestCase):
    def setUp(self):
        self.table = Table(
            fields={"name": rows.fields.TextField, "birthdate": rows.fields.DateField}
        )
        self.first_row = {
            "name": "Álvaro Justen",
            "birthdate": datetime.date(1987, 4, 29),
        }
        self.table.append(self.first_row)
        self.table.append({"name": "Somebody", "birthdate": datetime.date(1990, 2, 1)})
        self.table.append({"name": "Douglas Adams", "birthdate": "1952-03-11"})

    def test_table_init_slug_creation_on_fields(self):
        table = rows.Table(
            fields=collections.OrderedDict(
                [('Query Occurrence"( % ),"First Seen', rows.fields.FloatField)]
            )
        )

        self.assertIn("query_occurrence_first_seen", table.fields)

    def test_Table_is_present_on_main_namespace(self):
        self.assertIn("Table", dir(rows))
        self.assertIs(Table, rows.Table)

    def test_table_iteration(self):
        # TODO: may test with all field types (using tests.utils.table)

        table_rows = [row for row in self.table]
        self.assertEqual(len(table_rows), 3)
        self.assertEqual(table_rows[0].name, "Álvaro Justen")
        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
        self.assertEqual(table_rows[1].name, "Somebody")
        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
        self.assertEqual(table_rows[2].name, "Douglas Adams")
        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))

    def test_table_slicing(self):
        self.assertEqual(len(self.table[::2]), 2)
        self.assertEqual(self.table[::2][0].name, "Álvaro Justen")

    def test_table_slicing_error(self):
        with self.assertRaises(ValueError) as context_manager:
            self.table[[1]]
        self.assertEqual(type(context_manager.exception), ValueError)

    def test_table_insert_row(self):
        self.table.insert(
            1, {"name": "Grace Hopper", "birthdate": datetime.date(1909, 12, 9)}
        )
        self.assertEqual(self.table[1].name, "Grace Hopper")

    def test_table_append_error(self):
        # TODO: may mock these validations and test only on *Field tests
        with self.assertRaises(ValueError) as context_manager:
            self.table.append(
                {"name": "Álvaro Justen".encode("utf-8"), "birthdate": "1987-04-29"}
            )
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertEqual(context_manager.exception.args[0], "Binary is not supported")

        with self.assertRaises(ValueError) as context_manager:
            self.table.append({"name": "Álvaro Justen", "birthdate": "WRONG"})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertIn("does not match format", context_manager.exception.args[0])

    def test_table_getitem_invalid_type(self):
        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14]
        self.assertEqual(
            exception_context.exception.args[0], "Unsupported key type: float"
        )

        with self.assertRaises(ValueError) as exception_context:
            self.table[b"name"]
        self.assertEqual(
            exception_context.exception.args[0],
            "Unsupported key type: {}".format(binary_type_name),
        )

    def test_table_getitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            self.table["doesnt-exist"]

        self.assertEqual(exception_context.exception.args[0], "doesnt-exist")

    def test_table_getitem_column_happy_path(self):
        expected_values = ["Álvaro Justen", "Somebody", "Douglas Adams"]
        self.assertEqual(self.table["name"], expected_values)

        expected_values = [
            datetime.date(1987, 4, 29),
            datetime.date(1990, 2, 1),
            datetime.date(1952, 3, 11),
        ]
        self.assertEqual(self.table["birthdate"], expected_values)

    def test_table_setitem_row(self):
        self.first_row["name"] = "turicas"
        self.first_row["birthdate"] = datetime.date(2000, 1, 1)
        self.table[0] = self.first_row
        self.assertEqual(self.table[0].name, "turicas")
        self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1))

    def test_field_names_and_types(self):
        self.assertEqual(self.table.field_names, list(self.table.fields.keys()))
        self.assertEqual(self.table.field_types, list(self.table.fields.values()))

    def test_table_setitem_column_happy_path_new_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table["user_id"] = [4, 5, 6]

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields + 1)

        self.assertIn("user_id", self.table.fields)
        self.assertIs(self.table.fields["user_id"], rows.fields.IntegerField)
        self.assertEqual(self.table[0].user_id, 4)
        self.assertEqual(self.table[1].user_id, 5)
        self.assertEqual(self.table[2].user_id, 6)

    def test_table_setitem_column_happy_path_replace_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table["name"] = [4, 5, 6]  # change values *and* type

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)

        self.assertIn("name", self.table.fields)
        self.assertIs(self.table.fields["name"], rows.fields.IntegerField)
        self.assertEqual(self.table[0].name, 4)
        self.assertEqual(self.table[1].name, 5)
        self.assertEqual(self.table[2].name, 6)

    def test_table_setitem_column_slug_field_name(self):
        self.assertNotIn("user_id", self.table.fields)
        self.table["User ID"] = [4, 5, 6]
        self.assertIn("user_id", self.table.fields)

    def test_table_setitem_column_invalid_length(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table["user_id"] = [4, 5]  # list len should be 3

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)
        self.assertEqual(
            exception_context.exception.args[0],
            "Values length (2) should be the same as Table " "length (3)",
        )

    def test_table_setitem_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(
            exception_context.exception.args[0], "Unsupported key type: float"
        )

        with self.assertRaises(ValueError) as exception_context:
            self.table[b"some_value"] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(
            exception_context.exception.args[0],
            "Unsupported key type: {}".format(binary_type_name),
        )

    def test_table_delitem_row(self):
        table_rows = [row for row in self.table]
        before = len(self.table)
        del self.table[0]
        after = len(self.table)
        self.assertEqual(after, before - 1)
        for row, expected_row in zip(self.table, table_rows[1:]):
            self.assertEqual(row, expected_row)

    def test_table_delitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            del self.table["doesnt-exist"]

        self.assertEqual(exception_context.exception.args[0], "doesnt-exist")

    def test_table_delitem_column_happy_path(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        del self.table["name"]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertEqual(len(self.table.fields), len(fields) - 1)

        self.assertDictEqual(
            dict(self.table[0]._asdict()), {"birthdate": datetime.date(1987, 4, 29)}
        )
        self.assertDictEqual(
            dict(self.table[1]._asdict()), {"birthdate": datetime.date(1990, 2, 1)}
        )
        self.assertDictEqual(
            dict(self.table[2]._asdict()), {"birthdate": datetime.date(1952, 3, 11)}
        )

    def test_table_delitem_column_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            del self.table[3.14]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(
            exception_context.exception.args[0], "Unsupported key type: float"
        )

        with self.assertRaises(ValueError) as exception_context:
            self.table[b"name"] = []  # 'name' actually exists

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(
            exception_context.exception.args[0],
            "Unsupported key type: {}".format(binary_type_name),
        )

    def test_table_add(self):
        self.assertIs(self.table + 0, self.table)
        self.assertIs(0 + self.table, self.table)

        new_table = self.table + self.table
        self.assertEqual(new_table.fields, self.table.fields)
        self.assertEqual(len(new_table), 2 * len(self.table))
        self.assertEqual(list(new_table), list(self.table) * 2)

    def test_table_add_error(self):
        with self.assertRaises(ValueError):
            self.table + 1
        with self.assertRaises(ValueError):
            1 + self.table

    def test_table_order_by(self):
        with self.assertRaises(ValueError):
            self.table.order_by("doesnt_exist")

        before = [row.birthdate for row in self.table]
        self.table.order_by("birthdate")
        after = [row.birthdate for row in self.table]
        self.assertNotEqual(before, after)
        self.assertEqual(sorted(before), after)

        self.table.order_by("-birthdate")
        final = [row.birthdate for row in self.table]
        self.assertEqual(final, list(reversed(after)))

        self.table.order_by("name")
        expected_rows = [
            {"name": "Douglas Adams", "birthdate": datetime.date(1952, 3, 11)},
            {"name": "Somebody", "birthdate": datetime.date(1990, 2, 1)},
            {"name": "Álvaro Justen", "birthdate": datetime.date(1987, 4, 29)},
        ]
        for expected_row, row in zip(expected_rows, self.table):
            self.assertEqual(expected_row, dict(row._asdict()))

    def test_table_repr(self):
        expected = "<rows.Table 2 fields, 3 rows>"
        self.assertEqual(expected, repr(self.table))

    def test_table_add_should_not_iterate_over_rows(self):
        table1 = rows.Table(
            fields={"f1": rows.fields.IntegerField, "f2": rows.fields.FloatField}
        )
        table2 = rows.Table(
            fields={"f1": rows.fields.IntegerField, "f2": rows.fields.FloatField}
        )
        table1._rows = mock.Mock()
        table1._rows.__add__ = mock.Mock()
        table1._rows.__iter__ = mock.Mock()
        table2._rows = mock.Mock()
        table2._rows.__add__ = mock.Mock()
        table2._rows.__iter__ = mock.Mock()

        self.assertFalse(table1._rows.__add__.called)
        self.assertFalse(table2._rows.__add__.called)
        self.assertFalse(table1._rows.__iter__.called)
        self.assertFalse(table2._rows.__iter__.called)
        table1 + table2
        self.assertTrue(table1._rows.__add__.called)
        self.assertFalse(table2._rows.__add__.called)
        self.assertFalse(table1._rows.__iter__.called)
        self.assertFalse(table2._rows.__iter__.called)
示例#17
0
文件: utils.py 项目: turicas/rows
        "datetime_column": datetime.datetime(2015, 5, 6, 12, 1, 2),
        "percent_column": Decimal("0.02"),
        "unicode_column": "test",
    },
    {
        "float_column": None,
        "decimal_column": None,
        "bool_column": None,
        "integer_column": None,
        "date_column": None,
        "datetime_column": None,
        "percent_column": None,
        "unicode_column": "",
    },
]
table = Table(fields=FIELDS)
for row in EXPECTED_ROWS:
    table.append(row)
table._meta = {"test": 123}


class LazyGenerator(object):
    def __init__(self, max_number):
        self.max_number = max_number
        self.last = None

    def __iter__(self):
        yield ["number", "number_sq", "number_double"]
        for number in range(self.max_number):
            self.last = number
            yield [self.last, self.last ** 2, self.last * 2]
示例#18
0
class TableTestCase(unittest.TestCase):
    def setUp(self):
        self.table = Table(fields={
            'name': rows.fields.TextField,
            'birthdate': rows.fields.DateField,
        })
        self.first_row = {
            'name': u'Álvaro Justen',
            'birthdate': datetime.date(1987, 4, 29)
        }
        self.table.append(self.first_row)
        self.table.append({
            'name': u'Somebody',
            'birthdate': datetime.date(1990, 2, 1)
        })
        self.table.append({
            'name': u'Douglas Adams',
            'birthdate': '1952-03-11'
        })

    def test_Table_is_present_on_main_namespace(self):
        self.assertIn('Table', dir(rows))
        self.assertIs(Table, rows.Table)

    def test_table_iteration(self):
        # TODO: may test with all field types (using tests.utils.table)

        table_rows = [row for row in self.table]
        self.assertEqual(len(table_rows), 3)
        self.assertEqual(table_rows[0].name, u'Álvaro Justen')
        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
        self.assertEqual(table_rows[1].name, u'Somebody')
        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
        self.assertEqual(table_rows[2].name, u'Douglas Adams')
        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))

    def test_table_slicing(self):
        self.assertEqual(len(self.table[::2]), 2)
        self.assertEqual(self.table[::2][0].name, u'Álvaro Justen')

    def test_table_slicing_error(self):
        with self.assertRaises(ValueError) as context_manager:
            self.table[[1]]
        self.assertEqual(type(context_manager.exception), ValueError)

    def test_table_insert_row(self):
        self.table.insert(1, {
            'name': u'Grace Hopper',
            'birthdate': datetime.date(1909, 12, 9)
        })
        self.assertEqual(self.table[1].name, u'Grace Hopper')

    def test_table_append_error(self):
        # TODO: may mock these validations and test only on *Field tests
        with self.assertRaises(ValueError) as context_manager:
            self.table.append({
                'name': 'Álvaro Justen'.encode('utf-8'),
                'birthdate': '1987-04-29'
            })
        self.assertEqual(type(context_manager.exception), UnicodeDecodeError)

        with self.assertRaises(ValueError) as context_manager:
            self.table.append({'name': u'Álvaro Justen', 'birthdate': 'WRONG'})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertIn('does not match format',
                      context_manager.exception.message)

    def test_table_getitem_invalid_type(self):
        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14]
        self.assertEqual(exception_context.exception.message,
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'name']
        self.assertEqual(exception_context.exception.message,
                         'Unsupported key type: str')
        # TODO: should change to 'bytes' on Python3

    def test_table_getitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            self.table['doesnt-exist']

        self.assertEqual(exception_context.exception.message, 'doesnt-exist')

    def test_table_getitem_column_happy_path(self):
        expected_values = ['Álvaro Justen', 'Somebody', 'Douglas Adams']
        self.assertEqual(self.table['name'], expected_values)

        expected_values = [
            datetime.date(1987, 4, 29),
            datetime.date(1990, 2, 1),
            datetime.date(1952, 3, 11)
        ]
        self.assertEqual(self.table['birthdate'], expected_values)

    def test_table_setitem_row(self):
        self.first_row['name'] = 'turicas'
        self.first_row['birthdate'] = datetime.date(2000, 1, 1)
        self.table[0] = self.first_row
        self.assertEqual(self.table[0].name, 'turicas')
        self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1))

    def test_field_names_and_types(self):
        self.assertEqual(self.table.field_names, self.table.fields.keys())
        self.assertEqual(self.table.field_types, self.table.fields.values())

    def test_table_setitem_column_happy_path_new_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table['user_id'] = [4, 5, 6]

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields + 1)

        self.assertIn('user_id', self.table.fields)
        self.assertIs(self.table.fields['user_id'], rows.fields.IntegerField)
        self.assertEqual(self.table[0].user_id, 4)
        self.assertEqual(self.table[1].user_id, 5)
        self.assertEqual(self.table[2].user_id, 6)

    def test_table_setitem_column_happy_path_replace_column(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        self.table['name'] = [4, 5, 6]  # change values *and* type

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)

        self.assertIn('name', self.table.fields)
        self.assertIs(self.table.fields['name'], rows.fields.IntegerField)
        self.assertEqual(self.table[0].name, 4)
        self.assertEqual(self.table[1].name, 5)
        self.assertEqual(self.table[2].name, 6)

    def test_table_setitem_column_slug_field_name(self):
        self.assertNotIn('user_id', self.table.fields)
        self.table['User ID'] = [4, 5, 6]
        self.assertIn('user_id', self.table.fields)

    def test_table_setitem_column_invalid_length(self):
        number_of_fields = len(self.table.fields)
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table['user_id'] = [4, 5]  # list len should be 3

        self.assertEqual(len(self.table), 3)
        self.assertEqual(len(self.table.fields), number_of_fields)
        self.assertEqual(
            exception_context.exception.message,
            'Values length (2) should be the same as Table '
            'length (3)')

    def test_table_setitem_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            self.table[3.14] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.message,
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'some_value'] = []

        self.assertEqual(len(self.table), 3)  # should not add any row
        self.assertDictEqual(fields, self.table.fields)  # should not add field
        self.assertEqual(exception_context.exception.message,
                         'Unsupported key type: str')
        # TODO: should change to 'bytes' on Python3

    def test_table_delitem_row(self):
        table_rows = [row for row in self.table]
        before = len(self.table)
        del self.table[0]
        after = len(self.table)
        self.assertEqual(after, before - 1)
        for row, expected_row in zip(self.table, table_rows[1:]):
            self.assertEqual(row, expected_row)

    def test_table_delitem_column_doesnt_exist(self):
        with self.assertRaises(KeyError) as exception_context:
            del self.table['doesnt-exist']

        self.assertEqual(exception_context.exception.message, 'doesnt-exist')

    def test_table_delitem_column_happy_path(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        del self.table['name']

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertEqual(len(self.table.fields), len(fields) - 1)

        self.assertDictEqual(dict(self.table[0]._asdict()),
                             {'birthdate': datetime.date(1987, 4, 29)})
        self.assertDictEqual(dict(self.table[1]._asdict()),
                             {'birthdate': datetime.date(1990, 2, 1)})
        self.assertDictEqual(dict(self.table[2]._asdict()),
                             {'birthdate': datetime.date(1952, 3, 11)})

    def test_table_delitem_column_invalid_type(self):
        fields = self.table.fields.copy()
        self.assertEqual(len(self.table), 3)

        with self.assertRaises(ValueError) as exception_context:
            del self.table[3.14]

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.message,
                         'Unsupported key type: float')

        with self.assertRaises(ValueError) as exception_context:
            self.table[b'name'] = []  # u'name' actually exists

        self.assertEqual(len(self.table), 3)  # should not del any row
        self.assertDictEqual(fields, self.table.fields)  # should not del field
        self.assertEqual(exception_context.exception.message,
                         'Unsupported key type: str')
        # TODO: should change to 'bytes' on Python3

    def test_table_add(self):
        self.assertIs(self.table + 0, self.table)
        self.assertIs(0 + self.table, self.table)

        new_table = self.table + self.table
        self.assertEqual(new_table.fields, self.table.fields)
        self.assertEqual(len(new_table), 2 * len(self.table))
        self.assertEqual(list(new_table), list(self.table) * 2)

    def test_table_add_error(self):
        with self.assertRaises(ValueError):
            self.table + 1
        with self.assertRaises(ValueError):
            1 + self.table

    def test_table_order_by(self):
        with self.assertRaises(ValueError):
            self.table.order_by('doesnt_exist')

        before = [row.birthdate for row in self.table]
        self.table.order_by('birthdate')
        after = [row.birthdate for row in self.table]
        self.assertNotEqual(before, after)
        self.assertEqual(sorted(before), after)

        self.table.order_by('-birthdate')
        final = [row.birthdate for row in self.table]
        self.assertEqual(final, list(reversed(after)))

        self.table.order_by('name')
        expected_rows = [{
            'name': 'Douglas Adams',
            'birthdate': datetime.date(1952, 3, 11)
        }, {
            'name': 'Somebody',
            'birthdate': datetime.date(1990, 2, 1)
        }, {
            'name': 'Álvaro Justen',
            'birthdate': datetime.date(1987, 4, 29)
        }]
        for expected_row, row in zip(expected_rows, self.table):
            self.assertEqual(expected_row, dict(row._asdict()))

    def test_table_repr(self):
        expected = '<rows.Table 2 fields, 3 rows>'
        self.assertEqual(expected, repr(self.table))
示例#19
0
文件: utils.py 项目: tilacog/rows
        'unicode_column': 'álvaro',
        'null_column': 'none'.encode('utf-8')
    },
    {
        'float_column': 1.2345,
        'decimal_column': 1.2345,
        'bool_column': False,
        'integer_column': 6,
        'date_column': datetime.date(2015, 5, 6),
        'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2),
        'percent_column': Decimal('0.02'),
        'unicode_column': 'test',
        'null_column': ''.encode('utf-8')
    },
]
table = Table(fields=expected_fields)
for row in expected_rows:
    table.append(row)
table._meta = {'test': 123}


class RowsTestMixIn(object):

    maxDiff = None

    def setUp(self):
        self.files_to_delete = []

    def tearDown(self):
        for filename in self.files_to_delete:
            os.unlink(filename)
示例#20
0
            'datetime_column': datetime.datetime(2015, 5, 6, 12, 1, 2),
            'percent_column': Decimal('0.02'),
            'unicode_column': 'test',
        },
        {
            'float_column': None,
            'decimal_column': None,
            'bool_column': None,
            'integer_column': None,
            'date_column': None,
            'datetime_column': None,
            'percent_column': None,
            'unicode_column': '',
        }
]
table = Table(fields=FIELDS)
for row in EXPECTED_ROWS:
    table.append(row)
table._meta = {'test': 123}


class RowsTestMixIn(object):

    maxDiff = None
    override_fields = None

    def setUp(self):
        self.files_to_delete = []

    def tearDown(self):
        for filename in self.files_to_delete:
示例#21
0
class TableTestCase(unittest.TestCase):

    def setUp(self):
        self.table = Table(fields={'name': rows.fields.UnicodeField,
                                   'birthdate': rows.fields.DateField, })
        self.first_row = {'name': u'Álvaro Justen',
                          'birthdate': datetime.date(1987, 4, 29)}
        self.table.append(self.first_row)
        self.table.append({'name': u'Somebody',
                           'birthdate': datetime.date(1990, 2, 1)})
        self.table.append({'name': u'Douglas Adams',
                           'birthdate': '1952-03-11'})

    def test_Table_is_present_on_main_namespace(self):
        self.assertIn('Table', dir(rows))
        self.assertIs(Table, rows.Table)

    def test_table_iteration(self):
        # TODO: may test with all field types (using tests.utils.table)

        table_rows = [row for row in self.table]
        self.assertEqual(len(table_rows), 3)
        self.assertEqual(table_rows[0].name, u'Álvaro Justen')
        self.assertEqual(table_rows[0].birthdate, datetime.date(1987, 4, 29))
        self.assertEqual(table_rows[1].name, u'Somebody')
        self.assertEqual(table_rows[1].birthdate, datetime.date(1990, 2, 1))
        self.assertEqual(table_rows[2].name, u'Douglas Adams')
        self.assertEqual(table_rows[2].birthdate, datetime.date(1952, 3, 11))

    def test_table_append_error(self):
        # TODO: may mock these validations and test only on *Field tests
        with self.assertRaises(ValueError) as context_manager:
            self.table.append({'name': 'Álvaro Justen'.encode('utf-8'),
                               'birthdate': '1987-04-29'})
        self.assertEqual(type(context_manager.exception), UnicodeDecodeError)

        with self.assertRaises(ValueError) as context_manager:
            self.table.append({'name': u'Álvaro Justen', 'birthdate': 'WRONG'})
        self.assertEqual(type(context_manager.exception), ValueError)
        self.assertIn('does not match format',
                      context_manager.exception.message)

    def test_table_getitem_error(self):
        with self.assertRaises(ValueError) as context_manager:
            self.table['test']

    def test_table_setitem(self):
        self.first_row['name'] = 'turicas'
        self.first_row['birthdate'] = datetime.date(2000, 1, 1)
        self.table[0] = self.first_row
        self.assertEqual(self.table[0].name, 'turicas')
        self.assertEqual(self.table[0].birthdate, datetime.date(2000, 1, 1))

    def test_table_delitem(self):
        table_rows = [row for row in self.table]
        before = len(self.table)
        del self.table[0]
        after = len(self.table)
        self.assertEqual(after, before - 1)
        for row, expected_row in zip(self.table, table_rows[1:]):
            self.assertEqual(row, expected_row)

    def test_table_add(self):
        self.assertIs(self.table + 0, self.table)
        self.assertIs(0 + self.table, self.table)

        new_table = self.table + self.table
        self.assertEqual(new_table.fields, self.table.fields)
        self.assertEqual(len(new_table), 2 * len(self.table))
        self.assertEqual(list(new_table), list(self.table) * 2)

    def test_table_add_error(self):
        with self.assertRaises(ValueError):
            self.table + 1
        with self.assertRaises(ValueError):
            1 + self.table

    def test_table_order_by(self):
        with self.assertRaises(ValueError):
            self.table.order_by('doesnt_exist')

        before = [row.birthdate for row in self.table]
        self.table.order_by('birthdate')
        after = [row.birthdate for row in self.table]
        self.assertNotEqual(before, after)
        self.assertEqual(sorted(before), after)

        self.table.order_by('-birthdate')
        final = [row.birthdate for row in self.table]
        self.assertEqual(final, list(reversed(after)))

        self.table.order_by('name')
        expected_rows = [
            {'name': 'Douglas Adams', 'birthdate': datetime.date(1952, 3, 11)},
            {'name': 'Somebody', 'birthdate': datetime.date(1990, 2, 1)},
            {'name': 'Álvaro Justen', 'birthdate': datetime.date(1987, 4, 29)}]
        for expected_row, row in zip(expected_rows, self.table):
            self.assertEqual(expected_row, dict(row._asdict()))
示例#22
0
def create_table(data,
                 meta=None,
                 fields=None,
                 skip_header=True,
                 import_fields=None,
                 samples=None,
                 force_types=None,
                 *args,
                 **kwargs):
    """Create a rows.Table object based on data rows and some configurations

    - `skip_header` is only used if `fields` is set
    - `samples` is only used if `fields` is `None`. If samples=None, all data
      is filled in memory - use with caution.
    - `force_types` is only used if `fields` is `None`
    - `import_fields` can be used either if `fields` is set or not, the
      resulting fields will seek its order
    - `fields` must always be in the same order as the data
    """

    table_rows = iter(data)
    force_types = force_types or {}
    if import_fields is not None:
        import_fields = make_header(import_fields)

    if fields is None:  # autodetect field types
        # TODO: may add `type_hints` parameter so autodetection can be easier
        #       (plugins may specify some possible field types).
        header = make_header(next(table_rows))

        if samples is not None:
            sample_rows = list(islice(table_rows, 0, samples))
            table_rows = chain(sample_rows, table_rows)
        else:
            sample_rows = table_rows = list(table_rows)

        # Detect field types using only the desired columns
        detected_fields = detect_types(
            header,
            sample_rows,
            skip_indexes=[
                index for index, field in enumerate(header)
                if field in force_types or field not in (
                    import_fields or header)
            ],
            *args,
            **kwargs)
        # Check if any field was added during detecting process
        new_fields = [
            field_name for field_name in detected_fields.keys()
            if field_name not in header
        ]
        # Finally create the `fields` with both header and new field names,
        # based on detected fields `and force_types`
        fields = OrderedDict([(field_name,
                               detected_fields.get(field_name, TextField))
                              for field_name in header + new_fields])
        fields.update(force_types)

        # Update `header` and `import_fields` based on new `fields`
        header = list(fields.keys())
        if import_fields is None:
            import_fields = header

    else:  # using provided field types
        if not isinstance(fields, OrderedDict):
            raise ValueError("`fields` must be an `OrderedDict`")

        if skip_header:
            # If we're skipping the header probably this row is not trustable
            # (can be data or garbage).
            _ = next(table_rows)

        header = make_header(list(fields.keys()))
        if import_fields is None:
            import_fields = header

        fields = OrderedDict([(field_name, fields[key])
                              for field_name, key in zip(header, fields)])

    diff = set(import_fields) - set(header)
    if diff:
        field_names = ", ".join('"{}"'.format(field) for field in diff)
        raise ValueError("Invalid field names: {}".format(field_names))
    fields = OrderedDict([(field_name, fields[field_name])
                          for field_name in import_fields])

    get_row = get_items(*map(header.index, import_fields))
    table = Table(fields=fields, meta=meta)
    table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows)

    return table
示例#23
0
文件: utils.py 项目: turicas/rows
def create_table(
    data,
    meta=None,
    fields=None,
    skip_header=True,
    import_fields=None,
    samples=None,
    force_types=None,
    *args,
    **kwargs
):
    """Create a rows.Table object based on data rows and some configurations

    - `skip_header` is only used if `fields` is set
    - `samples` is only used if `fields` is `None`. If samples=None, all data
      is filled in memory - use with caution.
    - `force_types` is only used if `fields` is `None`
    - `import_fields` can be used either if `fields` is set or not, the
      resulting fields will seek its order
    - `fields` must always be in the same order as the data
    """

    table_rows = iter(data)
    force_types = force_types or {}
    if import_fields is not None:
        import_fields = make_header(import_fields)

    if fields is None:  # autodetect field types
        # TODO: may add `type_hints` parameter so autodetection can be easier
        #       (plugins may specify some possible field types).
        header = make_header(next(table_rows))

        if samples is not None:
            sample_rows = list(islice(table_rows, 0, samples))
            table_rows = chain(sample_rows, table_rows)
        else:
            sample_rows = table_rows = list(table_rows)

        # Detect field types using only the desired columns
        detected_fields = detect_types(
            header,
            sample_rows,
            skip_indexes=[
                index
                for index, field in enumerate(header)
                if field in force_types or field not in (import_fields or header)
            ],
            *args,
            **kwargs
        )
        # Check if any field was added during detecting process
        new_fields = [
            field_name
            for field_name in detected_fields.keys()
            if field_name not in header
        ]
        # Finally create the `fields` with both header and new field names,
        # based on detected fields `and force_types`
        fields = OrderedDict(
            [
                (field_name, detected_fields.get(field_name, TextField))
                for field_name in header + new_fields
            ]
        )
        fields.update(force_types)

        # Update `header` and `import_fields` based on new `fields`
        header = list(fields.keys())
        if import_fields is None:
            import_fields = header

    else:  # using provided field types
        if not isinstance(fields, OrderedDict):
            raise ValueError("`fields` must be an `OrderedDict`")

        if skip_header:
            # If we're skipping the header probably this row is not trustable
            # (can be data or garbage).
            _ = next(table_rows)

        header = make_header(list(fields.keys()))
        if import_fields is None:
            import_fields = header

        fields = OrderedDict(
            [(field_name, fields[key]) for field_name, key in zip(header, fields)]
        )

    diff = set(import_fields) - set(header)
    if diff:
        field_names = ", ".join('"{}"'.format(field) for field in diff)
        raise ValueError("Invalid field names: {}".format(field_names))
    fields = OrderedDict(
        [(field_name, fields[field_name]) for field_name in import_fields]
    )

    get_row = get_items(*map(header.index, import_fields))
    table = Table(fields=fields, meta=meta)
    table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows)

    return table