def test_get_entity_order(self): data = {'id': 1, 'title': 'One', 'baz': {'id': 1}} norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('bar', 'baz') norm._get_entity_order('foo', data) self.assertEqual(norm.entity_order, ['bar'])
def test_process_data_changes(self): data = {'id': 1, 'title': 'One', 'baz': {'id': 1}} norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('bar', 'baz') self.assertEqual(norm._process_data_changes('baz', data), data) norm.rename_flds('foo', 'title', 'heading') self.assertEqual(norm._process_data_changes('foo', data), { 'baz': { 'id': 1 }, 'id': 1, 'heading': 'One' })
def test_define_nested_entity(self): norm = Normalize() try: norm.define_nested_entity('foo', 'bar') self.assertTrue(False) except ValueError: self.assertTrue(True) norm.define_primary('foo') norm.define_nested_entity('bar', 'foo') self.assertEqual(norm.entities, { 'foo': { 'entities': { 'bar': { 'id': 'id', 'key': 'foo' } }, 'id': 'id' } }) norm.define_nested_entity('name', 'key', 'id') self.assertEqual( norm.entities, { 'foo': { 'entities': { 'bar': { 'id': 'id', 'key': 'foo' }, 'name': { 'id': 'id', 'key': 'key' } }, 'id': 'id' } })
'id': 3, 'name': 'Ben' } }, {'id': 4, 'title': 'Some Other Article'} ] # init norm = Normalize() # set the top level name norm.define_primary('articles') # define an entity and key to flatten. Optionally pass the id field as the thrid # positional argument, or 'id' will be used instead. Multiple nested entities are # recursively searched for. norm.define_nested_entity('users', 'author') norm.define_nested_entity('addresses', 'address') # rename fields for a given entity name # norm.rename_flds('addresses', 'street', 'road') # remove fields for a given entity name # norm.remove_flds('addresses', 'city') # entities nested multiple times require the depth to be defined so there is no # data loss. If not set, the code will dynamically determine the depth, but only # by using the first entry in the data set. If it's missing an entity it won't # parse correctly. If your data set is all structured the same (no rows are missing # an entity), you can skip this step. Otherwise, entities should be listed in order # of the most deeply nested to the least.
def test_parse(self): norm = Normalize() norm.define_primary('test', 'ID') try: norm.parse([{'id': 1, 'title': 'Some Article'}]) self.assertTrue(False) except ValueError: self.assertTrue(True) norm = Normalize() norm.define_primary('test') self.assertEqual( norm.parse([{ 'id': 1, 'title': 'Some Article' }]), { 'entities': { 'test': { 1: { 'id': 1, 'title': 'Some Article' } } }, 'results': [1] }) norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('bar', 'baz') norm.define_nested_entity('asdf', 'qwer') self.assertEqual(norm.parse([]), None) self.assertEqual( norm.parse([{ 'id': 1, 'title': 'One', 'baz': { 'id': 1 } }, { 'id': 2, 'title': 'Two', 'baz': { 'id': 2 } }]), { 'entities': { 'foo': { 1: { 'baz': [1], 'id': 1, 'title': 'One' }, 2: { 'baz': [2], 'id': 2, 'title': 'Two' } }, 'asdf': {}, 'bar': { 1: { 'id': 1 }, 2: { 'id': 2 } } }, 'results': [1, 2] }) data = [{ 'id': 1, 'title': 'One', 'baz': [{ 'id': 2 }, { 'id': 1, 'bar': { 'id': 1 } }] }] norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('test', 'baz') self.assertEqual( norm.parse(data), { 'entities': { 'test': { 1: { 'bar': { 'id': 1 }, 'id': 1 }, 2: { 'id': 2 } }, 'foo': { 1: { 'baz': [2, 1], 'id': 1, 'title': 'One' } } }, 'results': [1] }) data = [{ 'id': 1, 'title': 'One', 'baz': [{ 'id': 2 }, { 'id': 1, 'bar': { 'id': 1 } }] }] norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('test', 'baz') norm.swap_primary('test') self.assertEqual( norm.parse(data), { 'entities': { 'test': { 1: { 'bar': { 'id': 1 }, 'id': 1 }, 2: { 'id': 2 } }, 'foo': { 1: { 'baz': [2, 1], 'id': 1, 'title': 'One' } } }, 'results': [1, 2] }) data = [{ 'id': 1, 'title': 'One', 'baz': [{ 'id': 2 }, { 'id': 1, 'bar': { 'id': 1 } }] }] norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('test', 'baz') norm.swap_primary('test') norm.add_one_to_many_key('foo_ids', 'id', 'test', 'foo') self.assertEqual( norm.parse(data), { 'entities': { 'test': { 1: { 'foo_ids': [1], 'bar': { 'id': 1 }, 'id': 1 }, 2: { 'foo_ids': [], 'id': 2 } }, 'foo': { 1: { 'baz': [2, 1], 'id': 1, 'title': 'One' } } }, 'results': [1, 2] }) data = [{ 'id': 1, 'title': 'One', 'baz': [{ 'id': 1, 'bar': { 'id': 1 } }] }] norm = Normalize() norm.define_primary('foo') norm.define_nested_entity('test', 'baz') self.assertEqual( norm.parse(data), { 'entities': { 'test': { 1: { 'bar': { 'id': 1 }, 'id': 1 } }, 'foo': { 1: { 'baz': [1], 'id': 1, 'title': 'One' } } }, 'results': [1] })
} }, { 'id': 4, 'title': 'Some Other Article' }] # init norm = Normalize() # set the top level name norm.define_primary('articles') # define an entity and key to flatten. Optionally pass the id field as the thrid # positional argument, or 'id' will be used instead. Multiple nested entities are # recursively searched for. norm.define_nested_entity('users', 'author') norm.define_nested_entity('addresses', 'address') # rename fields for a given entity name # norm.rename_flds('addresses', 'street', 'road') # remove fields for a given entity name # norm.remove_flds('addresses', 'city') # entities nested multiple times require the depth to be defined so there is no # data loss. If not set, the code will dynamically determine the depth, but only # by using the first entry in the data set. If it's missing an entity it won't # parse correctly. If your data set is all structured the same (no rows are missing # an entity), you can skip this step. Otherwise, entities should be listed in order # of the most deeply nested to the least. #norm.set_entity_order(('addresses', 'users'))