def monitor_migration_status(migration_status, migration_history_obj): """ migration_history_obj must be pickled! Takes a pipeline status result and uses it to update MigrationHistory status as the migration runs. Calling forget() on this result will free up the worker to work on other tasks. """ # Set up a datastore client project = PROJECT or 'meridianedit-staging' client = apache_helper.get_datastore(project) throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25) if migration_status == 'DONE': migration_history_obj.properties.get('status').string_value = Status.success elif migration_status in ['FAILED', 'CANCELLED', 'CANCELLING']: migration_history_obj.properties.get('status').string_value = Status.failed elif migration_status in ['STARTING', 'RUNNING', 'UPDATED', 'DRAINING', 'DRAINED']: migration_history_obj.properties.get('status').string_value = Status.running elif migration_status in ['PENDING', 'STOPPED'] : migration_history_obj.properties.get('status').string_value = Status.waiting elif migration_status == 'UNKNOWN': migration_history_obj.properties.get('status').string_value = Status.unknown else: # Sometimes migration status equals none of these things. Just assume success so # we can kick off post-migration work. This is based on observation in the wild. migration_history_obj.properties.get('status').string_value = Status.success # Write the mutated entity to the datastore mutations = [Mutation(update=migration_history_obj)] apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)
def get_namespaces(self): # Skip auth-ing to db in test operations if not self.argv: return ['4952435991248896_1'] query_pb = query_pb2.Query() helper.set_kind(query_pb, "__namespace__") client = apache_helper.get_datastore(PROJECT) namespace_entities = apache_helper.fetch_entities( PROJECT, '', query_pb, client) namespaces = [] for n in namespace_entities: # Get namespace name or id key_path = n.key.path[-1] if key_path.HasField('id'): name_or_id = key_path.id else: name_or_id = key_path.name # Avoid duplicates and test namespaces if len(str(name_or_id)) > 1 and name_or_id not in namespaces: namespaces.append(name_or_id) return namespaces
def start_bundle(self): self._mutations = [] self._mutations_size = 0 self._datastore = helper.get_datastore(self._project) if self._fixed_batch_size: self._target_batch_size = self._fixed_batch_size else: self._batch_sizer = _Mutate._DynamicBatchSizer() self._target_batch_size = self._batch_sizer.get_batch_size(time.time())
def start_bundle(self): self._datastore = helper.get_datastore(self._project)
def start_bundle(self): self._mutations = [] self._datastore = helper.get_datastore(self._project)
def run_data_migration(): request_data = json.loads(request.get_data()) # Required fields fields = [ 'name', 'function_kwargs', 'user' ] # Some basic validation for f in fields: if f not in request_data: resp_data = json.dumps( { 'error': 'The ' + f + ' field is required.' } ) resp = Response(resp_data, status=400, mimetype='application/json') return resp if request_data['name'] not in migration.choices: resp_data = json.dumps( { 'error': 'The migration name is not valid.' } ) resp = Response(resp_data, status=400, mimetype='application/json') return resp migration_name = request_data['name'] function_kwargs = request_data['function_kwargs'] or {} user = request_data['user'] function_kwargs.update({'name': migration_name}) # Create a MigrationHistory entity to keep track of the migration status # set the project project = PROJECT or 'meridianedit-staging' # Create entity key partition_id = entity_pb2.PartitionId(project_id=project, namespace_id="") migration_history_obj_id = datetime.now().strftime("%Y%m%d%H%M%S") path_element = entity_pb2.Key.PathElement(kind="MigrationHistory", name=migration_history_obj_id) key = entity_pb2.Key(partition_id=partition_id, path=[path_element]) # Create entity and give it properties entity = entity_pb2.Entity(key=key) property_dict = { 'name': migration_name, 'function_kwargs': json.dumps(function_kwargs), 'started_by': user, 'status': 'running', 'created': datetime.now() } datastore_helper.add_properties(entity, property_dict) # Add entity to datastore mutations = [Mutation(insert=entity)] client = apache_helper.get_datastore(project) throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25) apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback) # Call the migration with any given function kwargs migration_kwargs = { 'migration_history_obj': migration_history_obj_id, } migration_kwargs.update(function_kwargs) # Run the migration in a celery task worker to prevent it timing # out this connection. Also monitor the task so we can update # migration status. run_dataflow_migration.delay(pickle.dumps(entity), **migration_kwargs) resp_data = { 'migration_history_obj_id': migration_history_obj_id } # A default 500 error message is returned if any of this breaks return Response(json.dumps(resp_data), status=200, mimetype='application/json')