Пример #1
0
def ImportCSV(app, kind, key_column, body_file, metadata_entity, user):
    error = False
    reader = csv.DictReader(body_file)

    # Hack to support Python 2.5 / 2.6
    if reader.fieldnames is None:
        reader.fieldnames = reader.reader.next()
    for f in reader.fieldnames:
        if ' ' in f or f[0:1].isdigit() or f[0] == '-':
            logging.error('Invalid field name: ' + f)
            error = True

    if (key_column and key_column not in reader.fieldnames):
        error = True
    if not key_column:
        key_column = reader.fieldnames[0]

    if error:
        return -1

    rows = 0

    for row in reader:
        key = row[key_column]
        if "kind" in row:
            del row["kind"]
        if "app" in row:
            del row["app"]
        if "key" in row:
            del row["key"]

        for r in row:
            if row[r] == NULL_VALUE:
                row[r] = None
            assert r is not None, \
                "Could not split CSV row properly: row field contains an extra comma: " + str(row)

        all_null = True
        for r in row:
            if r == key_column:
                continue
            elif row[r] != None:
                all_null = False
                break

        if all_null:
            row[key_column] = None

        CleanUpFormats(row)
        datastore.RunInTransaction(store.update_entity, app, kind, key, row,
                                   metadata_entity, user)
        rows = rows + 1

    return rows
    def RunInTransaction(self, func, *args, **kwds):
        """Run the pass function in a transaction.

    Blocks other changes to the storage.

    Args:
      func: a function reference
      args: the positional arguments list
      kwds: the keyword arguments dict
    Raises:
      score_ranker.TransactionFailedError if transaction failed
    """
        return datastore.RunInTransaction(func, *args, **kwds)
Пример #3
0
    def begin_processing(self, operation, operation_method, entities_per_task, queue):
        BATCH_SIZE = 3

        # Unpickle the source query
        query = cPickle.loads(str(self["query"]))

        def txn():
            try:
                marker = datastore.Get(self.key())
                marker.__class__ = ShardedTaskMarker

                queued_shards = marker[ShardedTaskMarker.QUEUED_KEY]
                processing_shards = marker[ShardedTaskMarker.RUNNING_KEY]
                queued_count = len(queued_shards)

                for j in xrange(min(BATCH_SIZE, queued_count)):
                    pickled_shard = queued_shards.pop()
                    processing_shards.append(pickled_shard)
                    shard = cPickle.loads(str(pickled_shard))
                    deferred.defer(
                        self.run_shard,
                        query,
                        shard,
                        operation,
                        operation_method,
                        entities_per_task=entities_per_task,
                        # Defer this task onto the correct queue with `_queue`, passing the `queue`
                        # parameter back to the function again so that it can do the same next time
                        queue=queue,
                        _queue=queue,
                        _transactional=True,
                    )

                marker.put()
            except datastore_errors.EntityNotFoundError:
                logging.error(
                    "Unable to start task %s as marker is missing",
                    self.key().id_or_name()
                )
                return

        # Reload the marker (non-transactionally) and defer the shards in batches
        # transactionally. If this task fails somewhere, it will resume where it left off
        marker = datastore.Get(self.key())
        for i in xrange(0, len(marker[ShardedTaskMarker.QUEUED_KEY]), BATCH_SIZE):
            datastore.RunInTransaction(txn)
Пример #4
0
def GetOrInsert(key, kindName=None, parent=None, **kwargs):
    """
		Either creates a new entity with the given key, or returns the existing one.

		Its guaranteed that there is no race-condition here; it will never overwrite an
		previously created entity. Extra keyword arguments passed to this function will be
		used to populate the entity if it has to be created; otherwise they are ignored.

		:param key: The key which will be fetched or created. \
		If key is a string, it will be used as the name for the new entity, therefore the \
		collectionName is required in this case.
		:type key: server.db.Key | String
		:param kindName: The data kind to use for that entity. Ignored if key is a db.Key.
		:type kindName: str

		:param parent: The parent entity of the entity.
		:type parent: db.Key or None

		:returns: Returns the wanted Entity.
		:rtype: server.db.Entity
	"""
    def txn(key, kwargs):
        try:
            res = datastore.Get(key)
        except datastore_errors.EntityNotFoundError:
            res = Entity(kind=key.kind(),
                         parent=key.parent(),
                         name=key.name(),
                         id=key.id())
            for k, v in kwargs.items():
                res[k] = v
            datastore.Put(res)
        return (res)

    if not isinstance(key, datastore_types.Key):
        try:
            key = datastore_types.Key(encoded=key)
        except:
            assert kindName
            key = datastore_types.Key.from_path(kindName, key, parent=parent)
    if datastore.IsInTransaction():
        return txn(key, kwargs)

    return datastore.RunInTransaction(txn, key, kwargs)
Пример #5
0
def start_mapping(
    identifier, query, operation, operation_method=None, shard_count=None,
    entities_per_task=None, queue=None
):
    """ This must *transactionally* defer a task which will call `operation._wrapped_map_entity` on
        all entities of the given `kind` in the given `namespace` and will then transactionally
        update the entity of the given `task_marker_key_key` with `is_finished=True` after all
        entities have been mapped.
    """
    shard_count = shard_count or getattr(settings, "DJANGAE_MIGRATION_DEFAULT_SHARD_COUNT", 32)
    shards_to_run = shard_query(query, shard_count)
    queue = queue or getattr(settings, "DJANGAE_MIGRATION_DEFAULT_QUEUE", _DEFAULT_QUEUE)

    def txn(shards):
        marker_key = ShardedTaskMarker.get_key(identifier, query._Query__namespace)
        try:
            datastore.Get(marker_key)

            # If the marker already exists, don't do anything - just return
            return
        except datastore_errors.EntityNotFoundError:
            pass

        marker = ShardedTaskMarker(identifier, query, namespace=query._Query__namespace)

        if shards:
            for shard in shards:
                marker["shards_queued"].append(cPickle.dumps(shard))
        else:
            # No shards, then there is nothing to do!
            marker["is_finished"] = True
        marker["time_started"] = datetime.utcnow()
        marker.put()
        if not marker["is_finished"]:
            deferred.defer(
                marker.begin_processing, operation, operation_method, entities_per_task, queue,
                _transactional=True, _queue=queue
            )

        return marker_key

    return datastore.RunInTransaction(txn, shards_to_run)
Пример #6
0
def ImportSplitFile(app, body_file):
    rows = 0

    segments = body_file.getvalue().split("\n\n")
    for seg in segments:
        buf = StringIO.StringIO(seg)
        header = buf.readline().lstrip("#")
        kind = header.strip()
        reader = csv.DictReader(buf)
        # Hack to support Python 2.5 / 2.6
        if reader.fieldnames is None:
            reader.fieldnames = reader.reader.next()
        for row in reader:
            key = row[reader.fieldnames[0]]
            CleanUpFormats(row)
            datastore.RunInTransaction(store.update_entity, app, kind, key,
                                       row)
            rows = rows + 1

    return rows
Пример #7
0
    def run_shard(
        self, original_query, shard, operation, operation_method=None, offset=0,
        entities_per_task=None, queue=_DEFAULT_QUEUE
    ):
        """ Given a datastore.Query which does not have any high/low bounds on it, apply the bounds
            of the given shard (which is a pair of keys), and run either the given `operation`
            (if it's a function) or the given method of the given operation (if it's an object) on
            each entity that the query returns, starting at entity `offset`, and redeferring every
            `entities_per_task` entities to avoid hitting DeadlineExceededError.
            Tries (but does not guarantee) to avoid processing the same entity more than once.
        """
        entities_per_task = entities_per_task or getattr(
            settings, "DJANGAE_MIGRATION_DEFAULT_ENTITIES_PER_TASK", 100
        )
        if operation_method:
            function = getattr(operation, operation_method)
        else:
            function = operation

        marker = datastore.Get(self.key())
        if cPickle.dumps(shard) not in marker[ShardedTaskMarker.RUNNING_KEY]:
            return

        # Copy the query so that we can re-defer the original, unadulterated version, because once
        # we've applied limits and ordering to the query it causes pickle errors with defer.
        query = copy.deepcopy(original_query)
        query.Order("__key__")
        query["__key__ >="] = shard[0]
        query["__key__ <"] = shard[1]

        num_entities_processed = 0
        try:
            results = query.Run(offset=offset, limit=entities_per_task)
            for entity in results:
                function(entity)
                num_entities_processed += 1
                if num_entities_processed >= entities_per_task:
                    raise Redefer()
        except (DeadlineExceededError, Redefer):
            # By keeping track of how many entities we've processed, we can (hopefully) avoid
            # re-processing entities if we hit DeadlineExceededError by redeferring with the
            # incremented offset.  But note that if we get crushed by the HARD DeadlineExceededError
            # before we can redefer, then the whole task will retry and so entities will get
            # processed twice.
            deferred.defer(
                self.run_shard,
                original_query,
                shard,
                operation,
                operation_method,
                offset=offset+num_entities_processed,
                entities_per_task=entities_per_task,
                # Defer this task onto the correct queue (with `_queue`), passing the `queue`
                # parameter back to the function again so that it can do the same next time
                queue=queue,
                _queue=queue,
            )
            return  # This is important!

        # Once we've run the operation on all the entities, mark the shard as done
        def txn():
            pickled_shard = cPickle.dumps(shard)
            marker = datastore.Get(self.key())
            marker.__class__ = ShardedTaskMarker
            marker[ShardedTaskMarker.RUNNING_KEY].remove(pickled_shard)
            marker[ShardedTaskMarker.FINISHED_KEY].append(pickled_shard)
            marker.put()

        datastore.RunInTransaction(txn)
Пример #8
0
 def run_in_transaction(self, func, *args, **kw):
     return datastore.RunInTransaction(func, *args, **kw)
Пример #9
0
 def transactional_operation(*args, **kwargs):
     return datastore.RunInTransaction(operation, *args, **kwargs)
Пример #10
0
    def post(self):
        metadata_entity = store.GetMetadataEntity(self.request)
        auth_level = store.GetAuthLevel(self.request, metadata_entity)
        (app, kind, id) = store.extract_path(self.request.path)

        if not store.IsEncryptionSufficient(self.request, metadata_entity):
            self.response.set_status(403)
            self.response.clear()
            return

        if not store.IsAuthorized(app, kind, id, auth_level, store.WRITE):
            self.response.set_status(401)
            self.response.clear()
            return

        user = store.GetUser(self.request)

        #logging.info("%s, %s, %s" % (app, kind, id))
        data = self.request.body_file.getvalue()
        data_obj = {}
        if (len(data) == 0):
            return

        if (app is not None and kind is not None and id is not None):
            try:
                data_obj = json.loads(data, use_decimal=True)
            except json.JSONDecodeError:
                self.response.set_status(500)
                self.response.clear()
                return

            datastore.RunInTransaction(store.update_entity, app, kind, id,
                                       data_obj, metadata_entity, user)

        elif self.request.headers['Content-type'].startswith(
                'text/csv') and id is None:
            key_column = self.request.get('key', None)
            result = csv_import.ImportCSV(app, kind, key_column,
                                          self.request.body_file,
                                          metadata_entity, user)

            if result == -1:
                self.response.set_status(500)
                self.response.clear()
                return

            self.response.out.write(result)
            self.response.out.write("\n")

        elif (app is not None and kind is not None and id is None):
            try:
                data_obj = json.loads(data, use_decimal=True)
            except json.JSONDecodeError:
                self.response.set_status(500)
                self.response.clear()
                return
            if not isinstance(data_obj, list):
                self.response.set_status(500)
                self.response.clear()
                return
            for data in data_obj:
                if not 'key' in data:
                    self.response.set_status(500)
                    self.response.clear()
                    return

            count = 0
            for data in data_obj:
                datastore.RunInTransaction(store.update_entity, app, kind,
                                           data['key'], data, metadata_entity,
                                           user)
                count += 1

            self.response.out.write(count)
            self.response.out.write("\n")