Пример #1
0
def monitor_migration_status(migration_status, migration_history_obj):
    """
    migration_history_obj must be pickled!

    Takes a pipeline status result and uses it to update MigrationHistory
    status as the migration runs.

    Calling forget() on this result will free up the worker to work on
    other tasks.
    """
    # Set up a datastore client
    project = PROJECT or 'meridianedit-staging'
    client = apache_helper.get_datastore(project)
    throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)

    if migration_status == 'DONE':
        migration_history_obj.properties.get('status').string_value = Status.success
    elif migration_status in ['FAILED', 'CANCELLED', 'CANCELLING']:
        migration_history_obj.properties.get('status').string_value = Status.failed
    elif migration_status in ['STARTING', 'RUNNING', 'UPDATED', 'DRAINING', 'DRAINED']:
        migration_history_obj.properties.get('status').string_value = Status.running
    elif migration_status in ['PENDING', 'STOPPED'] :
        migration_history_obj.properties.get('status').string_value = Status.waiting
    elif migration_status == 'UNKNOWN':
        migration_history_obj.properties.get('status').string_value = Status.unknown
    else:
        # Sometimes migration status equals none of these things. Just assume success so
        # we can kick off post-migration work. This is based on observation in the wild.
        migration_history_obj.properties.get('status').string_value = Status.success

    # Write the mutated entity to the datastore
    mutations = [Mutation(update=migration_history_obj)]
    apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)
Пример #2
0
  def test_throttling_after_errors(self, mock_random):
    mock_random().uniform.side_effect = [x/10.0 for x in range(0, 10)]*2
    self._throttler = AdaptiveThrottler(
        AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
        AdaptiveThrottlerTest.OVERLOAD_RATIO)
    for t in range(AdaptiveThrottlerTest.START_TIME,
                   AdaptiveThrottlerTest.START_TIME + 20):
      throttled = self._throttler.throttle_request(t)
      # 1/3rd of requests succeeding.
      if t % 3 == 1:
        self._throttler.successful_request(t)

      if t > AdaptiveThrottlerTest.START_TIME + 10:
        # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled.
        self.assertAlmostEqual(
            0.33, self._throttler._throttling_probability(t), delta=0.1)
        # Given the mocked random numbers, we expect 10..13 to be throttled and
        # 14+ to be unthrottled.
        self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14, throttled)
Пример #3
0
 def __init__(self, project):
   """
   Args:
     project: (str) cloud project id
   """
   self._project = project
   self._client = None
   self._rpc_successes = Metrics.counter(
       _Mutate.DatastoreMutateFn, "datastoreRpcSuccesses")
   self._rpc_errors = Metrics.counter(
       _Mutate.DatastoreMutateFn, "datastoreRpcErrors")
   self._throttled_secs = Metrics.counter(
       _Mutate.DatastoreMutateFn, "cumulativeThrottlingSeconds")
   self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000,
                                       overload_ratio=1.25)
Пример #4
0
 def __init__(self, project, fixed_batch_size=None):
   """
   Args:
     project: str, the cloud project id.
     fixed_batch_size: int, for testing only, this forces all batches of
        writes to be a fixed size, for easier unittesting.
   """
   self._project = project
   self._datastore = None
   self._fixed_batch_size = fixed_batch_size
   self._rpc_successes = Metrics.counter(
       _Mutate.DatastoreWriteFn, "datastoreRpcSuccesses")
   self._rpc_errors = Metrics.counter(
       _Mutate.DatastoreWriteFn, "datastoreRpcErrors")
   self._throttled_secs = Metrics.counter(
       _Mutate.DatastoreWriteFn, "cumulativeThrottlingSeconds")
   self._throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000,
                                       overload_ratio=1.25)
Пример #5
0
  def test_throttling_after_errors(self, mock_random):
    mock_random().uniform.side_effect = [x/10.0 for x in range(0, 10)]*2
    self._throttler = AdaptiveThrottler(
        AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
        AdaptiveThrottlerTest.OVERLOAD_RATIO)
    for t in range(AdaptiveThrottlerTest.START_TIME,
                   AdaptiveThrottlerTest.START_TIME + 20):
      throttled = self._throttler.throttle_request(t)
      # 1/3rd of requests succeeding.
      if t % 3 == 1:
        self._throttler.successful_request(t)

      if t > AdaptiveThrottlerTest.START_TIME + 10:
        # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled.
        self.assertAlmostEqual(
            0.33, self._throttler._throttling_probability(t), delta=0.1)
        # Given the mocked random numbers, we expect 10..13 to be throttled and
        # 14+ to be unthrottled.
        self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14, throttled)
Пример #6
0
 def setUp(self):
     self._throttler = AdaptiveThrottler(
         AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
         AdaptiveThrottlerTest.OVERLOAD_RATIO)
Пример #7
0
class AdaptiveThrottlerTest(unittest.TestCase):

    START_TIME = 1500000000000
    SAMPLE_PERIOD = 60000
    BUCKET = 1000
    OVERLOAD_RATIO = 2

    def setUp(self):
        self._throttler = AdaptiveThrottler(
            AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
            AdaptiveThrottlerTest.OVERLOAD_RATIO)

    # As far as practical, keep these tests aligned with
    # AdaptiveThrottlerTest.java.

    def test_no_initial_throttling(self):
        self.assertEqual(
            0,
            self._throttler._throttling_probability(
                AdaptiveThrottlerTest.START_TIME))

    def test_no_throttling_if_no_errors(self):
        for t in range(AdaptiveThrottlerTest.START_TIME,
                       AdaptiveThrottlerTest.START_TIME + 20):
            self.assertFalse(self._throttler.throttle_request(t))
            self._throttler.successful_request(t)
        self.assertEqual(
            0,
            self._throttler._throttling_probability(
                AdaptiveThrottlerTest.START_TIME + 20))

    def test_no_throttling_after_errors_expire(self):
        for t in range(
                AdaptiveThrottlerTest.START_TIME,
                AdaptiveThrottlerTest.START_TIME +
                AdaptiveThrottlerTest.SAMPLE_PERIOD, 100):
            self._throttler.throttle_request(t)
            # And no sucessful_request
        self.assertLess(
            0,
            self._throttler._throttling_probability(
                AdaptiveThrottlerTest.START_TIME +
                AdaptiveThrottlerTest.SAMPLE_PERIOD))
        for t in range(
                AdaptiveThrottlerTest.START_TIME +
                AdaptiveThrottlerTest.SAMPLE_PERIOD,
                AdaptiveThrottlerTest.START_TIME +
                AdaptiveThrottlerTest.SAMPLE_PERIOD * 2, 100):
            self._throttler.throttle_request(t)
            self._throttler.successful_request(t)

        self.assertEqual(
            0,
            self._throttler._throttling_probability(
                AdaptiveThrottlerTest.START_TIME +
                AdaptiveThrottlerTest.SAMPLE_PERIOD * 2))

    @patch('random.Random')
    def test_throttling_after_errors(self, mock_random):
        mock_random().uniform.side_effect = [x / 10.0
                                             for x in range(0, 10)] * 2
        self._throttler = AdaptiveThrottler(
            AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
            AdaptiveThrottlerTest.OVERLOAD_RATIO)
        for t in range(AdaptiveThrottlerTest.START_TIME,
                       AdaptiveThrottlerTest.START_TIME + 20):
            throttled = self._throttler.throttle_request(t)
            # 1/3rd of requests succeeding.
            if t % 3 == 1:
                self._throttler.successful_request(t)

            if t > AdaptiveThrottlerTest.START_TIME + 10:
                # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled.
                self.assertAlmostEqual(
                    0.33,
                    self._throttler._throttling_probability(t),
                    delta=0.1)
                # Given the mocked random numbers, we expect 10..13 to be throttled and
                # 14+ to be unthrottled.
                self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14,
                                 throttled)
Пример #8
0
 def setUp(self):
   self._throttler = AdaptiveThrottler(
       AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
       AdaptiveThrottlerTest.OVERLOAD_RATIO)
Пример #9
0
class AdaptiveThrottlerTest(unittest.TestCase):

  START_TIME = 1500000000000
  SAMPLE_PERIOD = 60000
  BUCKET = 1000
  OVERLOAD_RATIO = 2

  def setUp(self):
    self._throttler = AdaptiveThrottler(
        AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
        AdaptiveThrottlerTest.OVERLOAD_RATIO)

  # As far as practical, keep these tests aligned with
  # AdaptiveThrottlerTest.java.

  def test_no_initial_throttling(self):
    self.assertEqual(0, self._throttler._throttling_probability(
        AdaptiveThrottlerTest.START_TIME))

  def test_no_throttling_if_no_errors(self):
    for t in range(AdaptiveThrottlerTest.START_TIME,
                   AdaptiveThrottlerTest.START_TIME + 20):
      self.assertFalse(self._throttler.throttle_request(t))
      self._throttler.successful_request(t)
    self.assertEqual(0, self._throttler._throttling_probability(
        AdaptiveThrottlerTest.START_TIME + 20))

  def test_no_throttling_after_errors_expire(self):
    for t in range(AdaptiveThrottlerTest.START_TIME,
                   AdaptiveThrottlerTest.START_TIME
                   + AdaptiveThrottlerTest.SAMPLE_PERIOD, 100):
      self._throttler.throttle_request(t)
      # And no sucessful_request
    self.assertLess(0, self._throttler._throttling_probability(
        AdaptiveThrottlerTest.START_TIME + AdaptiveThrottlerTest.SAMPLE_PERIOD
        ))
    for t in range(AdaptiveThrottlerTest.START_TIME
                   + AdaptiveThrottlerTest.SAMPLE_PERIOD,
                   AdaptiveThrottlerTest.START_TIME
                   + AdaptiveThrottlerTest.SAMPLE_PERIOD*2, 100):
      self._throttler.throttle_request(t)
      self._throttler.successful_request(t)

    self.assertEqual(0, self._throttler._throttling_probability(
        AdaptiveThrottlerTest.START_TIME +
        AdaptiveThrottlerTest.SAMPLE_PERIOD*2))

  @patch('random.Random')
  def test_throttling_after_errors(self, mock_random):
    mock_random().uniform.side_effect = [x/10.0 for x in range(0, 10)]*2
    self._throttler = AdaptiveThrottler(
        AdaptiveThrottlerTest.SAMPLE_PERIOD, AdaptiveThrottlerTest.BUCKET,
        AdaptiveThrottlerTest.OVERLOAD_RATIO)
    for t in range(AdaptiveThrottlerTest.START_TIME,
                   AdaptiveThrottlerTest.START_TIME + 20):
      throttled = self._throttler.throttle_request(t)
      # 1/3rd of requests succeeding.
      if t % 3 == 1:
        self._throttler.successful_request(t)

      if t > AdaptiveThrottlerTest.START_TIME + 10:
        # Roughly 1/3rd succeeding, 1/3rd failing, 1/3rd throttled.
        self.assertAlmostEqual(
            0.33, self._throttler._throttling_probability(t), delta=0.1)
        # Given the mocked random numbers, we expect 10..13 to be throttled and
        # 14+ to be unthrottled.
        self.assertEqual(t < AdaptiveThrottlerTest.START_TIME + 14, throttled)
Пример #10
0
def run_data_migration():
    request_data = json.loads(request.get_data())

    # Required fields
    fields = [
        'name',
        'function_kwargs',
        'user'
    ]

    # Some basic validation
    for f in fields:
        if f not in request_data:
            resp_data = json.dumps(
                {
                    'error': 'The ' + f + ' field is required.'
                }
            )
            resp = Response(resp_data, status=400, mimetype='application/json')
            return resp

    if request_data['name'] not in migration.choices:
        resp_data = json.dumps(
            {
                'error': 'The migration name is not valid.'
            }
        )
        resp = Response(resp_data, status=400, mimetype='application/json')
        return resp

    migration_name = request_data['name']
    function_kwargs = request_data['function_kwargs'] or {}
    user = request_data['user']

    function_kwargs.update({'name': migration_name})

    # Create a MigrationHistory entity to keep track of the migration status
    # set the project
    project = PROJECT or 'meridianedit-staging'

    # Create entity key
    partition_id = entity_pb2.PartitionId(project_id=project, namespace_id="")
    migration_history_obj_id = datetime.now().strftime("%Y%m%d%H%M%S")
    path_element = entity_pb2.Key.PathElement(kind="MigrationHistory", name=migration_history_obj_id)
    key = entity_pb2.Key(partition_id=partition_id, path=[path_element])

    # Create entity and give it properties
    entity = entity_pb2.Entity(key=key)
    property_dict = {
        'name': migration_name,
        'function_kwargs': json.dumps(function_kwargs),
        'started_by': user,
        'status': 'running',
        'created': datetime.now()
    }
    datastore_helper.add_properties(entity, property_dict)

    # Add entity to datastore
    mutations = [Mutation(insert=entity)]
    client = apache_helper.get_datastore(project)
    throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
    apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)

    # Call the migration with any given function kwargs
    migration_kwargs = {
        'migration_history_obj': migration_history_obj_id,
    }
    migration_kwargs.update(function_kwargs)

    # Run the migration in a celery task worker to prevent it timing
    # out this connection. Also monitor the task so we can update
    # migration status.
    run_dataflow_migration.delay(pickle.dumps(entity), **migration_kwargs)

    resp_data = {
        'migration_history_obj_id': migration_history_obj_id
    }

    # A default 500 error message is returned if any of this breaks
    return Response(json.dumps(resp_data), status=200, mimetype='application/json')