def monitor_migration_status(migration_status, migration_history_obj): """ migration_history_obj must be pickled! Takes a pipeline status result and uses it to update MigrationHistory status as the migration runs. Calling forget() on this result will free up the worker to work on other tasks. """ # Set up a datastore client project = PROJECT or 'meridianedit-staging' client = apache_helper.get_datastore(project) throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25) if migration_status == 'DONE': migration_history_obj.properties.get('status').string_value = Status.success elif migration_status in ['FAILED', 'CANCELLED', 'CANCELLING']: migration_history_obj.properties.get('status').string_value = Status.failed elif migration_status in ['STARTING', 'RUNNING', 'UPDATED', 'DRAINING', 'DRAINED']: migration_history_obj.properties.get('status').string_value = Status.running elif migration_status in ['PENDING', 'STOPPED'] : migration_history_obj.properties.get('status').string_value = Status.waiting elif migration_status == 'UNKNOWN': migration_history_obj.properties.get('status').string_value = Status.unknown else: # Sometimes migration status equals none of these things. Just assume success so # we can kick off post-migration work. This is based on observation in the wild. migration_history_obj.properties.get('status').string_value = Status.success # Write the mutated entity to the datastore mutations = [Mutation(update=migration_history_obj)] apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)
def test_throttling_after_errors(self, mock_random): mock_random().uniform.side_effect = [x/10.0 for x in range(0, 10)]*2 self._throttler = AdaptiveThrottler( AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET, AdaptiveThrottlerTest.OVERLOAD_RATIO) for t in range(AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + 20): throttled = self._throttler.throttle_request(t) # 1/3rd of requests succeeding. if t % 3 == 1: self._throttler.successful_request(t) if t > AdaptiveThrottlerTest.START_TIME + 10: # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled. self.assertAlmostEqual( 0.33, self._throttler._throttling_probability(t), delta=0.1) # Given the mocked random numbers, we expect 10..13 to be throttled and # 14+ to be unthrottled. self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14, throttled)
def __init__(self, project): """ Args: project: (str) cloud project id """ self._project = project self._client = None self._rpc_successes = Metrics.counter( _Mutate.DatastoreMutateFn, "datastoreRpcSuccesses") self._rpc_errors = Metrics.counter( _Mutate.DatastoreMutateFn, "datastoreRpcErrors") self._throttled_secs = Metrics.counter( _Mutate.DatastoreMutateFn, "cumulativeThrottlingSeconds") self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
def __init__(self, project, fixed_batch_size=None): """ Args: project: str, the cloud project id. fixed_batch_size: int, for testing only, this forces all batches of writes to be a fixed size, for easier unittesting. """ self._project = project self._datastore = None self._fixed_batch_size = fixed_batch_size self._rpc_successes = Metrics.counter( _Mutate.DatastoreWriteFn, "datastoreRpcSuccesses") self._rpc_errors = Metrics.counter( _Mutate.DatastoreWriteFn, "datastoreRpcErrors") self._throttled_secs = Metrics.counter( _Mutate.DatastoreWriteFn, "cumulativeThrottlingSeconds") self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
def setUp(self): self._throttler = AdaptiveThrottler( AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET, AdaptiveThrottlerTest.OVERLOAD_RATIO)
class AdaptiveThrottlerTest(unittest.TestCase): START_TIME = 1500000000000 SAMPLE_PERIOD = 60000 BUCKET = 1000 OVERLOAD_RATIO = 2 def setUp(self): self._throttler = AdaptiveThrottler( AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET, AdaptiveThrottlerTest.OVERLOAD_RATIO) # As far as practical, keep these tests aligned with # AdaptiveThrottlerTest.java. def test_no_initial_throttling(self): self.assertEqual( 0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME)) def test_no_throttling_if_no_errors(self): for t in range(AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + 20): self.assertFalse(self._throttler.throttle_request(t)) self._throttler.successful_request(t) self.assertEqual( 0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME + 20)) def test_no_throttling_after_errors_expire(self): for t in range( AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD, 100): self._throttler.throttle_request(t) # And no sucessful_request self.assertLess( 0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD)) for t in range( AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD * 2, 100): self._throttler.throttle_request(t) self._throttler.successful_request(t) self.assertEqual( 0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD * 2)) @patch('random.Random') def test_throttling_after_errors(self, mock_random): mock_random().uniform.side_effect = [x / 10.0 for x in range(0, 10)] * 2 self._throttler = AdaptiveThrottler( AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET, AdaptiveThrottlerTest.OVERLOAD_RATIO) for t in range(AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + 20): throttled = self._throttler.throttle_request(t) # 1/3rd of requests succeeding. if t % 3 == 1: self._throttler.successful_request(t) if t > AdaptiveThrottlerTest.START_TIME + 10: # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled. self.assertAlmostEqual( 0.33, self._throttler._throttling_probability(t), delta=0.1) # Given the mocked random numbers, we expect 10..13 to be throttled and # 14+ to be unthrottled. self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14, throttled)
class AdaptiveThrottlerTest(unittest.TestCase): START_TIME = 1500000000000 SAMPLE_PERIOD = 60000 BUCKET = 1000 OVERLOAD_RATIO = 2 def setUp(self): self._throttler = AdaptiveThrottler( AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET, AdaptiveThrottlerTest.OVERLOAD_RATIO) # As far as practical, keep these tests aligned with # AdaptiveThrottlerTest.java. def test_no_initial_throttling(self): self.assertEqual(0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME)) def test_no_throttling_if_no_errors(self): for t in range(AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + 20): self.assertFalse(self._throttler.throttle_request(t)) self._throttler.successful_request(t) self.assertEqual(0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME + 20)) def test_no_throttling_after_errors_expire(self): for t in range(AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD, 100): self._throttler.throttle_request(t) # And no sucessful_request self.assertLess(0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD )) for t in range(AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD*2, 100): self._throttler.throttle_request(t) self._throttler.successful_request(t) self.assertEqual(0, self._throttler._throttling_probability( AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD*2)) @patch('random.Random') def test_throttling_after_errors(self, mock_random): mock_random().uniform.side_effect = [x/10.0 for x in range(0, 10)]*2 self._throttler = AdaptiveThrottler( AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET, AdaptiveThrottlerTest.OVERLOAD_RATIO) for t in range(AdaptiveThrottlerTest.START_TIME, AdaptiveThrottlerTest.START_TIME + 20): throttled = self._throttler.throttle_request(t) # 1/3rd of requests succeeding. if t % 3 == 1: self._throttler.successful_request(t) if t > AdaptiveThrottlerTest.START_TIME + 10: # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled. self.assertAlmostEqual( 0.33, self._throttler._throttling_probability(t), delta=0.1) # Given the mocked random numbers, we expect 10..13 to be throttled and # 14+ to be unthrottled. self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14, throttled)
def run_data_migration(): request_data = json.loads(request.get_data()) # Required fields fields = [ 'name', 'function_kwargs', 'user' ] # Some basic validation for f in fields: if f not in request_data: resp_data = json.dumps( { 'error': 'The ' + f + ' field is required.' } ) resp = Response(resp_data, status=400, mimetype='application/json') return resp if request_data['name'] not in migration.choices: resp_data = json.dumps( { 'error': 'The migration name is not valid.' } ) resp = Response(resp_data, status=400, mimetype='application/json') return resp migration_name = request_data['name'] function_kwargs = request_data['function_kwargs'] or {} user = request_data['user'] function_kwargs.update({'name': migration_name}) # Create a MigrationHistory entity to keep track of the migration status # set the project project = PROJECT or 'meridianedit-staging' # Create entity key partition_id = entity_pb2.PartitionId(project_id=project, namespace_id="") migration_history_obj_id = datetime.now().strftime("%Y%m%d%H%M%S") path_element = entity_pb2.Key.PathElement(kind="MigrationHistory", name=migration_history_obj_id) key = entity_pb2.Key(partition_id=partition_id, path=[path_element]) # Create entity and give it properties entity = entity_pb2.Entity(key=key) property_dict = { 'name': migration_name, 'function_kwargs': json.dumps(function_kwargs), 'started_by': user, 'status': 'running', 'created': datetime.now() } datastore_helper.add_properties(entity, property_dict) # Add entity to datastore mutations = [Mutation(insert=entity)] client = apache_helper.get_datastore(project) throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25) apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback) # Call the migration with any given function kwargs migration_kwargs = { 'migration_history_obj': migration_history_obj_id, } migration_kwargs.update(function_kwargs) # Run the migration in a celery task worker to prevent it timing # out this connection. Also monitor the task so we can update # migration status. run_dataflow_migration.delay(pickle.dumps(entity), **migration_kwargs) resp_data = { 'migration_history_obj_id': migration_history_obj_id } # A default 500 error message is returned if any of this breaks return Response(json.dumps(resp_data), status=200, mimetype='application/json')