def _flush_batch(self): # Flush the current batch of mutations to Cloud Datastore. helper.write_mutations(self._datastore, self._project, self._mutations) logging.debug("Successfully wrote %d mutations.", len(self._mutations)) self._mutations = []
def monitor_migration_status(migration_status, migration_history_obj): """ migration_history_obj must be pickled! Takes a pipeline status result and uses it to update MigrationHistory status as the migration runs. Calling forget() on this result will free up the worker to work on other tasks. """ # Set up a datastore client project = PROJECT or 'meridianedit-staging' client = apache_helper.get_datastore(project) throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25) if migration_status == 'DONE': migration_history_obj.properties.get('status').string_value = Status.success elif migration_status in ['FAILED', 'CANCELLED', 'CANCELLING']: migration_history_obj.properties.get('status').string_value = Status.failed elif migration_status in ['STARTING', 'RUNNING', 'UPDATED', 'DRAINING', 'DRAINED']: migration_history_obj.properties.get('status').string_value = Status.running elif migration_status in ['PENDING', 'STOPPED'] : migration_history_obj.properties.get('status').string_value = Status.waiting elif migration_status == 'UNKNOWN': migration_history_obj.properties.get('status').string_value = Status.unknown else: # Sometimes migration status equals none of these things. Just assume success so # we can kick off post-migration work. This is based on observation in the wild. migration_history_obj.properties.get('status').string_value = Status.success # Write the mutated entity to the datastore mutations = [Mutation(update=migration_history_obj)] apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)
def _flush_batch(self): # Flush the current batch of mutations to Cloud Datastore. _, latency_ms = helper.write_mutations( self._datastore, self._project, self._mutations, self._update_rpc_stats) logging.debug("Successfully wrote %d mutations in %dms.", len(self._mutations), latency_ms) if not self._fixed_batch_size: now = time.time() self._batch_sizer.report_latency(now, latency_ms, len(self._mutations)) self._target_batch_size = self._batch_sizer.get_batch_size(now) self._mutations = [] self._mutations_size = 0
def _flush_batch(self): # Flush the current batch of mutations to Cloud Datastore. _, latency_ms = helper.write_mutations( self._datastore, self._project, self._mutations, self._throttler, self._update_rpc_stats, throttle_delay=_Mutate._WRITE_BATCH_TARGET_LATENCY_MS//1000) logging.debug("Successfully wrote %d mutations in %dms.", len(self._mutations), latency_ms) if not self._fixed_batch_size: now = time.time()*1000 self._batch_sizer.report_latency(now, latency_ms, len(self._mutations)) self._target_batch_size = self._batch_sizer.get_batch_size(now) self._mutations = [] self._mutations_size = 0
def _flush_batch(self): # Flush the current batch of mutations to Cloud Datastore. _, latency_ms = helper.write_mutations( self._datastore, self._project, self._mutations, self._throttler, self._update_rpc_stats, throttle_delay=util.WRITE_BATCH_TARGET_LATENCY_MS//1000) _LOGGER.debug("Successfully wrote %d mutations in %dms.", len(self._mutations), latency_ms) if not self._fixed_batch_size: now = time.time()*1000 self._batch_sizer.report_latency(now, latency_ms, len(self._mutations)) self._target_batch_size = self._batch_sizer.get_batch_size(now) self._mutations = [] self._mutations_size = 0
def run_data_migration(): request_data = json.loads(request.get_data()) # Required fields fields = [ 'name', 'function_kwargs', 'user' ] # Some basic validation for f in fields: if f not in request_data: resp_data = json.dumps( { 'error': 'The ' + f + ' field is required.' } ) resp = Response(resp_data, status=400, mimetype='application/json') return resp if request_data['name'] not in migration.choices: resp_data = json.dumps( { 'error': 'The migration name is not valid.' } ) resp = Response(resp_data, status=400, mimetype='application/json') return resp migration_name = request_data['name'] function_kwargs = request_data['function_kwargs'] or {} user = request_data['user'] function_kwargs.update({'name': migration_name}) # Create a MigrationHistory entity to keep track of the migration status # set the project project = PROJECT or 'meridianedit-staging' # Create entity key partition_id = entity_pb2.PartitionId(project_id=project, namespace_id="") migration_history_obj_id = datetime.now().strftime("%Y%m%d%H%M%S") path_element = entity_pb2.Key.PathElement(kind="MigrationHistory", name=migration_history_obj_id) key = entity_pb2.Key(partition_id=partition_id, path=[path_element]) # Create entity and give it properties entity = entity_pb2.Entity(key=key) property_dict = { 'name': migration_name, 'function_kwargs': json.dumps(function_kwargs), 'started_by': user, 'status': 'running', 'created': datetime.now() } datastore_helper.add_properties(entity, property_dict) # Add entity to datastore mutations = [Mutation(insert=entity)] client = apache_helper.get_datastore(project) throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25) apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback) # Call the migration with any given function kwargs migration_kwargs = { 'migration_history_obj': migration_history_obj_id, } migration_kwargs.update(function_kwargs) # Run the migration in a celery task worker to prevent it timing # out this connection. Also monitor the task so we can update # migration status. run_dataflow_migration.delay(pickle.dumps(entity), **migration_kwargs) resp_data = { 'migration_history_obj_id': migration_history_obj_id } # A default 500 error message is returned if any of this breaks return Response(json.dumps(resp_data), status=200, mimetype='application/json')