def test_trims_seen_ids(self): # If a relationship tries to follow to an ID we've already seen we # should stop it self.create_owner(1, 'Bob') self.create_pet(1, 'Ginger', parent_id=None, owner_id=1) relations = [ From('pet', 'owner_id').to('owner', 'id').bidirectional(), ] # Mock out the get_table method so we can track how often it is called original_get_table = dumper.Dumper._get_table def mock_get_table(*args, **kwargs): mock_get_table.call_count += 1 original_get_table(*args, **kwargs) mock_get_table.call_count = 0 dumper.Dumper._get_table = mock_get_table try: self.do_partial_dump(relations, 'owner', '1=1') # Only two tables, should only get called twice! self.assertEquals(2, mock_get_table.call_count) finally: dumper.get_table = original_get_table
def test_lots_of_references(self): # Lots of references should work fine for x in xrange(1, 201): self.create_owner(x, 'Bob') self.create_pet(x, 'Ginger', parent_id=None, owner_id=x) relations = [ From('pet', 'owner_id').to('owner', 'id').bidirectional(), ] self.do_partial_dump(relations, 'owner', '1=1') self.import_dump() self.assertEquals(200, len(self.get_owners())) self.assertEquals(200, len(self.get_pets()))
def test_unidirectional_link_forwards(self): # Unidirectional links should only work one way. This is to account for # tables not having indexes when following the link backwards and hence # making the dump really slow self.create_owner(1, 'Bob') self.create_pet(1, 'Ginger', parent_id=None, owner_id=1) relations = [ From('pet', 'owner_id').to('owner', 'id'), ] self.do_partial_dump(relations, 'pet', '1=1') self.import_dump() self.assertEquals(1, len(self.get_owners())) self.assertEquals(1, len(self.get_pets()))
def test_batch_size(self): self.create_owner(1, 'Alan') self.create_owner(2, 'Bob') self.create_pet(1, 'Ginger', parent_id=None, owner_id=1) self.create_pet(2, 'Tabby', parent_id=None, owner_id=2) pks = { 'owner': Pk(['id']).in_batches(1), 'pet': Pk(['id']), } relations = [ From('owner', 'id').to('pet', 'owner_id'), ] self.do_partial_dump(relations, 'owner', '1=1', pks=pks) # Each owner should result in a distinct insert into the pet table self.import_dump() f = open("%s.%d" % (TEST_OUTPUT_PREFIX, 0), 'r') result = f.read() f.close() self.assertEquals(1 + 2, result.count('INSERT'))
def test_many_rows_two_chunks(self): # Creating two chunks and importing them should work fine for x in xrange(1, 201): self.create_owner(x, 'Bob') self.create_pet(x, 'Ginger', parent_id=None, owner_id=x) relations = [ From('pet', 'owner_id').to('owner', 'id').bidirectional(), ] self.do_partial_dump(relations, 'owner', '1=1', chunks=2) self.import_dump(chunks=2) self.assertEquals(200, len(self.get_owners())) self.assertEquals(200, len(self.get_pets())) # The two files should be fairly sizable size1 = os.path.getsize("%s.%d" % (TEST_OUTPUT_PREFIX, 0)) size2 = os.path.getsize("%s.%d" % (TEST_OUTPUT_PREFIX, 0)) self.assertTrue(size1 > 1000) self.assertTrue(size2 > 1000)
def test_back_reference(self): # A reference from X to Y should cause X be pulled in if Y is pulled in self.create_owner(1, 'Bob') self.create_pet(1, 'Ginger', parent_id=None, owner_id=1) relations = [ From('pet', 'owner_id').to('owner', 'id').bidirectional(), ] self.do_partial_dump(relations, 'owner', '1=1') # Reimporting the result should give a single row that is the same as # the original input self.import_dump() owners = self.get_owners() self.assertEquals(1, len(owners)) pets = self.get_pets() self.assertEquals(1, len(pets)) self.assertEquals('Ginger', pets[1]['name']) self.assertEquals(None, pets[1]['parent_id']) self.assertEquals(1, pets[1]['owner_id'])
def test_multiple_value_reference(self): # A reference that pulls in multiple rows should work self.create_owner(1, 'Bob') self.create_pet(1, 'Ginger', parent_id=None, owner_id=1) self.create_pet(2, 'Tabby', parent_id=None, owner_id=1) relations = [ From('owner', 'id').to('pet', 'owner_id'), ] self.do_partial_dump(relations, 'owner', '1=1') # Reimporting the result should give a single row that is the same as # the original input self.import_dump() pets = self.get_pets() self.assertEquals(2, len(pets)) self.assertEquals('Ginger', pets[1]['name']) self.assertEquals(None, pets[1]['parent_id']) self.assertEquals(1, pets[1]['owner_id']) self.assertEquals('Tabby', pets[2]['name']) self.assertEquals(None, pets[2]['parent_id']) self.assertEquals(1, pets[2]['owner_id'])
from mysqlpartialdump import Pk, From, NO_KEY_CACHE, ALLOW_DUPLICATES def clean_email(row): row['email'] = "%s%d" % (row['email'][:3], hash(row['email'])) return row pks = { 'Customer': Pk(['id'], NO_KEY_CACHE, ALLOW_DUPLICATES), 'Order': Pk(['id']).in_batches(1), 'OrderLine': Pk(['id']), 'Product': Pk(['id']), } relationships = [ From('Customer', 'id').to('Order', 'customer_id').bidirectional(), From('Order', 'id').to('OrderLine', 'order_id').bidirectional(), From('OrderLine', 'product_id').to('Product', 'id').bidirectional(), ] callbacks = { 'Customer': clean_email, } end_sql = "" start_table = 'Product' start_where = '1=1' start_args = []
from mysqlpartialdump import Pk, From pks = { 'Customer': Pk(['id']), 'Order': Pk(['id']), 'OrderLine': Pk(['id']), 'Product': Pk(['id']), } relationships = [ From('Customer', 'id').to('Order', 'customer_id'), From('Order', 'id').to('OrderLine', 'order_id'), From('OrderLine', 'product_id').to('Product', 'id'), ] callbacks = {} end_sql = "" start_table = 'Customer' start_where = 'id=%s' start_args = ['1']