def test_find_moved_partitions(self): old_location = "s3://old-bucket/table/" self.s3.create_bucket(Bucket=self.bucket) self.helper.make_database_and_table() partitions = sorted(self.helper.create_many_partitions(count=15)) batch_input = [] for partition in partitions: batch_input.append({ "Values": partition.values, "StorageDescriptor": { "Location": f"{old_location}/data/" } }) self.glue.batch_create_partition(DatabaseName=self.database, TableName=self.table, PartitionInputList=batch_input) partitioner = Partitioner(self.database, self.table, aws_region=self.region) moved = partitioner.find_moved_partitions() moved.should.have.length_of(len(partitions)) moved.sort() partitions.sort() for idx, partition in enumerate(partitions): moved[idx].should.equal(partition)
def test_update_partition_locations(self): old_location = "s3://old-bucket/table/" self.s3.create_bucket(Bucket=self.bucket) self.helper.make_database_and_table() partitions = sorted(self.helper.create_many_partitions(count=15)) batch_input = [] calls = [] for partition in partitions: batch_input.append({ "Values": partition.values, "StorageDescriptor": { "Location": f"{old_location}/data/" } }) calls.append( call(DatabaseName=self.database, TableName=self.table, PartitionValueList=partition.values, PartitionInput=ANY)) self.glue.batch_create_partition(DatabaseName=self.database, TableName=self.table, PartitionInputList=batch_input) partitioner = Partitioner(self.database, self.table, aws_region=self.region) mock = MagicMock() partitioner.glue.update_partition = mock moved = partitioner.find_moved_partitions() errors = partitioner.update_partition_locations(moved) errors.should.be.empty mock.assert_has_calls(calls, any_order=True)
def test_find_moved_partitions_with_missing_partitions(self): old_location = "s3://old-bucket/table/" self.s3.create_bucket(Bucket=self.bucket) self.helper.make_database_and_table() self.glue.create_partition(DatabaseName=self.database, TableName=self.table, PartitionInput={ "Values": ["2019", "01", "01", "01"], "StorageDescriptor": { "Location": f"{old_location}/data/" } }) partitioner = Partitioner(self.database, self.table, aws_region=self.region) mock = MagicMock() partitioner.glue.update_partition = mock updated = partitioner.find_moved_partitions() updated.should.be.empty