示例#1
0
    def enrichEntity(self, entity, decorations, max_iterations=None, timestamp=None):
        """
            (might be named enrichedEntityWithSources)
        enrichEntity takes a entity schema object (defined in api/Schemas.py), an output dict of decorations that is
            opaque to this class - only group objects and sources have an understanding of the decorations format
            the group method syncDecorations() handles all propagation of source local decorations to the output decoration dict
          returns a bool value indicating whether the entity was enriched
        """
        self.setNow(timestamp)
        max_iterations = max_iterations or self.__default_max_iterations
        modified_total = False
        logs.debug("Begin enrichment: %s (%s)" % (entity.title, entity.entity_id))

        # We will loop through all sources multiple times, because as data is enriched, previous unresolvable sources
        # may become resolvable and can enrich in turn.  If no fields are modified by any source in a given iteration,
        # then there's no reason to loop again
        for i in range(max_iterations):
            modified = False
            for source in self.__sources:
                if entity.kind not in source.kinds:
                    continue

                if entity.types and source.types and not set(entity.types).intersection(source.types):
                    continue

                groups = source.getGroups(entity)
                targetGroups = set()
                for group in groups:
                    if self.shouldEnrich(group, source.sourceName, entity):
                        targetGroups.add(group)
                if not targetGroups:
                    continue

                #  We have groups that are eligible for enrichment.  We'll modify a deep-copy of the entity
                copy = buildEntity(entity.dataExport())
                # timestamps is passed down to the source. If the source enriches a group, a mapping is added from the
                # group name to the time it was enriched (now, essentially). When the data we get from external source
                # is identical to what we already have, presence of the group in this map is the only way we can tell
                # that we received fresh data.
                # TODO: This is a dictionary for legacy reasons, it should really be a set.
                timestamps = {}
                localDecorations = {}  # opaque decorations, for group object based extensions (i.e. Menus)
                logs.debug("Enriching with '%s' for groups %s" % (source.sourceName, sorted(targetGroups)))
                groupObjs = [self.getGroup(group) for group in targetGroups]
                try:
                    enriched = source.enrichEntity(copy, groupObjs, self, localDecorations, timestamps)
                    if enriched:
                        for groupObj in groupObjs:
                            fieldsChanged = groupObj.syncFields(copy, entity)
                            decorationsChanged = groupObj.syncDecorations(localDecorations, decorations)
                            if fieldsChanged or groupObj.groupName in timestamps or decorationsChanged:
                                groupObj.setTimestamp(entity, self.now)
                                groupObj.setSource(entity, source.sourceName)
                                modified = True
                except Exception as e:
                    report()
            if not modified:
                break
            modified_total |= modified
        return modified_total
示例#2
0
    def _convertFromMongo(self, document):
        if document is None:
            return None

        if 'search_blurb' in document:
            del(document['search_blurb'])

        document = self._upgradeDocument(document)
        
        if '_id' in document and self._primary_key is not None:
            document[self._primary_key] = self._getStringFromObjectId(document['_id'])
            del(document['_id'])

        entityData = document.pop('entity')
        document['entity'] = {'entity_id': entityData['entity_id']}
        
        stamp = self._obj().dataImport(document, overflow=self._overflow)

        try:
            entity = buildEntity(entityData, mini=True)
            stamp.entity = entity
        except Exception as e:
            logs.warning("Unable to upgrade entity embedded within stamp '%s'" % (stamp.stamp_id))

        return stamp 
示例#3
0
    def _convertFromMongo(self, document):
        if document is None:
            return None

        if '_id' in document and self._primary_key is not None:
            document[self._primary_key] = self._getStringFromObjectId(document['_id'])
            del(document['_id'])

        document.pop('titlel')

        entity = buildEntity(document)
        
        return entity
示例#4
0
    def _convertFromMongo(self, document):
        """
        Keep in mind this is returning a RawTodo, which is less-enriched than a Todo
        """
        if document is None:
            return None

        if "_id" in document and self._primary_key is not None:
            document[self._primary_key] = self._getStringFromObjectId(document["_id"])
            del (document["_id"])

        entityData = document.pop("entity")
        entity = buildEntity(entityData, mini=True)
        document["entity"] = {"entity_id": entity.entity_id}

        stampData = document.pop("stamp", None)
        if stampData is not None:
            document["source_stamp_ids"] = [stampData["stamp_id"]]

        rawtodo = self._obj().dataImport(document, overflow=self._overflow)
        rawtodo.entity = entity

        return rawtodo
示例#5
0
    def _convertFromMongo(self, document, mini=False):
        if document is None:
            return None

        if '_id' in document and self._primary_key is not None:
            document[self._primary_key] = self._getStringFromObjectId(document['_id'])
            del(document['_id'])

        ### HACK: Verify that 'created' timestamp exists for entity
        if 'timestamp' not in document or 'created' not in document['timestamp']:
            try:
                created = ObjectId(document[self._primary_key]).generation_time.replace(tzinfo=None)
            except:
                report()
                raise
            document['timestamp'] = { 'created' : created }

        document.pop('titlel', None)
        document.pop('search_tokens', None)

        entity = buildEntity(document, mini=mini)

        return entity
示例#6
0
    def checkIntegrity(self, key, repair=False, api=None):
        document = self._getMongoDocumentFromId(key)
        
        assert document is not None

        modified = False

        # Check if old schema version
        if 'contents' not in document or 'credit' in document or 'search_blurb' not in document:
            msg = "%s: Old schema" % key
            if repair:
                logs.info(msg)
                modified = True
            else:
                raise StampedDataError(msg)

        stamp = self._convertFromMongo(document)

        # Verify that user exists
        userId = stamp.user.user_id
        if self._collection._database['users'].find({'_id': self._getObjectIdFromString(userId)}).count() == 0:
            msg = "%s: User not found (%s)" % (key, userId)
            raise StampedDataError(msg)

        # Verify that any credited users exist
        if stamp.credits is not None:
            credits = []
            for credit in stamp.credits:
                creditedUserId = credit.user.user_id
                query = {'_id' : self._getObjectIdFromString(creditedUserId)}
                if self._collection._database['users'].find(query).count() == 1:
                    credits.append(credit)
                else:
                    msg = "%s: Credited user not found (%s)" % (key, creditedUserId)
                    if repair:
                        logs.info(msg)
                        modified = True
                    else:
                        raise StampedDataError(msg)
            if len(credits) > 0:
                stamp.credits = credits
            else:
                msg = "%s: Cleaning up credits" % key
                logs.info(msg)
                if repair:
                    del(stamp.credits)
                    modified = True

        # Verify that entity exists
        entityId = stamp.entity.entity_id
        entityDocument = self._collection._database['entities'].find_one({'_id' : self._getObjectIdFromString(entityId)})
        if entityDocument is None:
            msg = "%s: Entity not found (%s)" % (key, entityId)
            raise StampedDataError(msg)
        entity = buildEntity(entityDocument)

        # Check if entity has been tombstoned and update entity if so
        if entity.sources.tombstone_id is not None:
            msg = "%s: Entity tombstoned to new entity" % (key)
            if repair:
                logs.info(msg)
                tombstoneId = entity.sources.tombstone_id
                tombstone = self._collection._database['entities'].find_one({'_id' : self._getObjectIdFromString(tombstoneId)})
                if tombstone is None:
                    msg = "%s: New tombstone entity not found (%s)" % (key, tombstoneId)
                    raise StampedDataError(msg)
                stamp.entity = buildEntity(tombstone).minimize()
                modified = True
            else:
                raise StampedDataError(msg)

        # Check if entity stub has been updated
        else:
            if stamp.entity != entity.minimize():
                msg = "%s: Embedded entity is stale" % key
                if repair:
                    logs.info(msg)
                    stamp.entity = entity.minimize()
                    modified = True
                else:
                    raise StampedDataError(msg)

        # Verify that stamp number is unique
        stampNum = stamp.stats.stamp_num
        duplicateStamps = self._collection.find({'user.user_id' : userId, 'stats.stamp_num' : stampNum})
        if duplicateStamps.count() > 1:
            msg = "%s: Multiple stamps exist for userId '%s' and stampNum '%s'" % (key, userId, stampNum)
            raise StampedDataError(msg)

        # Verify that this is the only stamp for this user for this entity
        if self._collection.find({'user.user_id': userId, 'entity.entity_id': stamp.entity.entity_id}).count() > 1:
            msg = "%s: Multiple stamps exist for user '%s' and entity '%s'" % (key, userId, stamp.entity.entity_id)
            raise StampedDataError(msg)

        ### TODO
        # Check if temp_image_url exists -> kick off async process
        # Check that image[s] have dimensions
        # Verify image url exists?
        # Check if stats need to be updated?

        if modified and repair:
            self._collection.update({'_id' : key}, self._convertToMongo(stamp))

        # Check integrity for stats
        self.stamp_stats.checkIntegrity(key, repair=repair, api=api)

        return True
示例#7
0
    def checkIntegrity(self, key, repair=False, api=None):
        """
        Check the raw todo to verify the following things:

        - Todo has the proper structure (updated schema)

        - Linked user exists

        - Linked entity exists and is not tombstoned 

        - Entity mini matches linked entity

        - If associated with a stamp, verify that the stamp still exists

        - Check if it's been stamped 

        """

        document = self._getMongoDocumentFromId(key)

        assert document is not None

        modified = False

        # Check if old schema version
        if "stamp" in document:
            msg = "%s: Old schema" % key
            if repair:
                logs.info(msg)
                modified = True
            else:
                raise StampedDataError(msg)

        todo = self._convertFromMongo(document)

        # Verify that user exists
        userId = todo.user_id
        if self._collection._database["users"].find({"_id": self._getObjectIdFromString(userId)}).count() == 0:
            msg = "%s: User not found (%s)" % (key, userId)
            raise StampedDataError(msg)

        # Verify that entity exists
        entityId = todo.entity.entity_id
        entityDocument = self._collection._database["entities"].find_one({"_id": self._getObjectIdFromString(entityId)})
        if entityDocument is None:
            msg = "%s: Entity not found (%s)" % (key, entityId)
            raise StampedDataError(msg)
        entity = buildEntity(entityDocument)

        # Check if entity has been tombstoned and update entity if so
        if entity.sources.tombstone_id is not None:
            msg = "%s: Entity tombstoned to new entity" % (key)
            if repair:
                logs.info(msg)
                tombstoneId = entity.sources.tombstone_id
                tombstone = self._collection._database["entities"].find_one(
                    {"_id": self._getObjectIdFromString(tombstoneId)}
                )
                if tombstone is None:
                    msg = "%s: New tombstone entity not found (%s)" % (key, tombstoneId)
                    raise StampedDataError(msg)
                todo.entity = buildEntity(tombstone).minimize()
                modified = True
            else:
                raise StampedDataError(msg)

        # Check if entity stub has been updated
        else:
            if todo.entity != entity.minimize():
                msg = "%s: Embedded entity is stale" % key
                if repair:
                    logs.info(msg)
                    todo.entity = entity.minimize()
                    modified = True
                else:
                    raise StampedDataError(msg)

        # Check if source stamps are still valid
        if todo.source_stamp_ids is not None:
            stampIds = []
            for stampId in todo.source_stamp_ids:
                query = {"_id": self._getObjectIdFromString(stampId)}
                if self._collection._database["stamps"].find(query).count() == 1:
                    stampIds.append(stampId)
                else:
                    msg = "%s: Sourced stamp not found (%s)" % (key, stampId)
                    if repair:
                        logs.info(msg)
                        modified = True
                    else:
                        raise StampedDataError(msg)
            if len(stampIds) > 0:
                todo.source_stamp_ids = stampIds
            else:
                msg = "%s: Cleaning up source stamp ids" % key
                logs.info(msg)
                if repair:
                    del (todo.source_stamp_ids)
                    modified = True

        # Check if todo has been stamped and verify only one possible todo exists
        query = {"user.user_id": todo.user_id, "entity.entity_id": todo.entity.entity_id}
        stamps = self._collection._database["stamps"].find(query, fields=["_id"])
        stampIds = map(lambda x: str(x["_id"]), stamps)
        if len(stampIds) == 1:
            if todo.stamp_id is None or todo.stamp_id != stampIds[0]:
                msg = "%s: Replacing stamp id" % key
                if repair:
                    logs.info(msg)
                    todo.stamp_id = stampIds[0]
                    modified = True
                else:
                    raise StampedDataError(msg)
        elif len(stampIds) > 1:
            msg = "%s: Multiple stamps exist for user '%s' and entity '%s'" % (key, todo.user_id, todo.entity.entity_id)
            raise StampedDataError(msg)

        if modified and repair:
            self._collection.update({"_id": key}, self._convertToMongo(todo))

        return True
示例#8
0
 def __init__(self, entity):
     self.__entity = buildEntity(entity.dataExport())