Exemplo n.º 1
0
    def fixInvalidCollections(self):
        searchKey = {
            "operations.collection": constants.INVALID_COLLECTION_MARKER
        }
        for session in self.metadata_db.Session.find(searchKey):
            for op in session["operations"]:
                dirty = False
                if op["collection"] != constants.INVALID_COLLECTION_MARKER:
                    continue

                if self.debug:
                    LOG.debug("Attempting to fix corrupted Operation:\n%s" %
                              pformat(op))

                # For each field referenced in the query, build a histogram of
                # which collections have a field with the same name
                fields = workload.getReferencedFields(op)
                h = Histogram()
                for c in self.metadata_db.Collection.find():
                    for f in c['fields']:
                        if f in fields:
                            h.put(c['name'])
                    ## FOR
                ## FOR

                matches = h.getMaxCountKeys()
                if len(matches) == 0:
                    LOG.warn(
                        "No matching collection was found for corrupted operation\n%s"
                        % pformat(op))
                    continue
                elif len(matches) > 1:
                    LOG.warn(
                        "More than one matching collection was found for corrupted operation %s\n%s"
                        % (matches, pformat(op)))
                    continue
                else:
                    op["collection"] = matches[0]
                    dirty = True
                    self.fix_ctr += 1
                    LOG.info("Fix corrupted collection in operation\n%s" %
                             pformat(op))
                    ## IF
                    ## FOR (operations)

            if dirty: session.save()
Exemplo n.º 2
0
    def fixInvalidCollections(self):
        searchKey = {"operations.collection": constants.INVALID_COLLECTION_MARKER}
        for session in self.metadata_db.Session.find(searchKey):
            for op in session["operations"]:
                dirty = False
                if op["collection"] != constants.INVALID_COLLECTION_MARKER:
                    continue

                if self.debug:
                    LOG.debug("Attempting to fix corrupted Operation:\n%s" % pformat(op))

                # For each field referenced in the query, build a histogram of
                # which collections have a field with the same name
                fields = workload.getReferencedFields(op)
                h = Histogram()
                for c in self.metadata_db.Collection.find():
                    for f in c["fields"]:
                        if f in fields:
                            h.put(c["name"])
                    ## FOR
                ## FOR

                matches = h.getMaxCountKeys()
                if len(matches) == 0:
                    LOG.warn("No matching collection was found for corrupted operation\n%s" % pformat(op))
                    continue
                elif len(matches) > 1:
                    LOG.warn(
                        "More than one matching collection was found for corrupted operation %s\n%s"
                        % (matches, pformat(op))
                    )
                    continue
                else:
                    op["collection"] = matches[0]
                    dirty = True
                    self.fix_ctr += 1
                    LOG.info("Fix corrupted collection in operation\n%s" % pformat(op))
                    ## IF
                    ## FOR (operations)

            if dirty:
                session.save()
Exemplo n.º 3
0
    def hash(self, op):
        """Compute a deterministic signature for the given operation based on its keys"""
        
        fields = None
        updateFields = None
        
        # QUERY
        if op["type"] == constants.OP_TYPE_QUERY:
            # The query field has our where clause
            if not "#query" in op["query_content"][0]:
                msg = "Missing query field in query_content for operation #%d" % op["query_id"]
                if self.debug: LOG.warn(pformat(op))
                raise Exception(msg)

            fields = op["query_content"][0][constants.REPLACE_KEY_DOLLAR_PREFIX + "query"]

        # UPDATE
        elif op["type"] == constants.OP_TYPE_UPDATE:
            # The first element in the content field is the WHERE clause
            fields = op["query_content"][0]
            
            # We use a separate field for the updated columns so that 
            updateFields = op['query_content'][1]

        # INSERT
        elif op["type"] == constants.OP_TYPE_INSERT:
            # They could be inserting more than one document here,
            # which all may have different fields...
            # So we will need to build a histogram for which keys are referenced
            # and use the onese that appear the most
            # XXX: We'll only consider keys in the first-level
            h = Histogram()
            for doc in op["query_content"]:
                assert type(doc) == dict, "Unexpected insert value:\n%s" % pformat(doc)
                for k in doc.keys():
                    h.put(k)
            ## FOR
            if LOG.isEnabledFor(logging.DEBUG):
                LOG.debug("Insert '%s' Keys Histogram:\n%s" % (op["collection"], h))
            maxKeys = h.getMaxCountKeys()
            assert len(maxKeys) > 0, \
                "No keys were found in %d insert documents?" % len(op["query_content"])
            
            fields = { }
            for doc in op["query_content"]:
                for k, v in doc.iteritems():
                    if k in maxKeys:
                        fields[k] = v
                ## FOR
            ## FOR
            
        # DELETE
        elif op["type"] == constants.OP_TYPE_DELETE:
            # The first element in the content field is the WHERE clause
            fields = op["query_content"][0]
        # UNKNOWN!
        else:
            raise Exception("Unexpected query type: %s" % op["type"])
        
        # Extract the list of fields that are used
        try:
            fieldsHash = self.computeFieldsHash(fields)
        except:
            LOG.error("Unexpected error when processing operation %d [fields=%s]" % (op["query_id"], str(fields)))
            raise
        updateHash = self.computeFieldsHash(updateFields) if updateFields else None
        
        t = (op["collection"], op["type"], fieldsHash, updateHash)
        h = long(hash(t))
        LOG.debug("%s %s => HASH:%d" % (fields, t, h))
        self.histogram.put(h)
        return h