Exemplo n.º 1
0
    def generateDesignCandidates(self, collections, isShardingEnabled=True, isIndexesEnabled=True, isDenormalizationEnabled=True):

        dc = DesignCandidates()
        valid_collection = set()
        for col_info in collections.itervalues():

            shardKeys = []
            indexKeys = []
            denorm = []

            interesting = col_info['interesting']
            valid_collection.add(col_info['name'])
            
            interesting = self.__remove_heuristicaly_bad_key__(col_info, interesting)
            # Make sure that none of our interesting fields start with
            # the character that we used to convert $ commands
            for key in interesting:
                assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), \
                    "Unexpected candidate key '%s.%s'" % (col_info["name"], key)

            if constants.SKIP_MONGODB_ID_FIELD and "_id" in interesting:
                interesting = interesting[:]
                interesting.remove("_id")

            # deal with shards
            if isShardingEnabled:
                LOG.debug("Sharding is enabled")
                shardKeys = interesting

            # deal with indexes
            if isIndexesEnabled:
                LOG.debug("Indexes is enabled")
                for o in xrange(1, len(interesting) + 1) :
                    if o > constants.MAX_INDEX_SIZE: break
                    for i in itertools.permutations(interesting, o):
                        indexKeys.append(i)
                    ## FOR
                ## FOR
            # deal with de-normalization
            if len(indexKeys) > 10:
                LOG.warn("Too many index keys: %s", len(indexKeys))
            if isDenormalizationEnabled:
                LOG.debug("Denormalization is enabled")
                for k,v in col_info['fields'].iteritems() :
                    if v['parent_col'] <> None and v['parent_col'] not in denorm and v['parent_col'] in valid_collection:
                        denorm.append(v['parent_col'])
            
            dc.addCollection(col_info['name'], indexKeys, shardKeys, denorm)
            ## FOR

        return dc
Exemplo n.º 2
0
    def generateDesignCandidates(
        self, collections, isShardingEnabled=True, isIndexesEnabled=True, isDenormalizationEnabled=True
    ):

        dc = DesignCandidates()
        valid_collection = set()
        for col_info in collections.itervalues():
            valid_collection.add(col_info["name"])

        for col_info in collections.itervalues():

            shardKeys = []
            indexKeys = []
            denorm = []

            interesting = col_info["interesting"]

            # interesting = self.__remove_heuristicaly_bad_key__(col_info, interesting)
            # Make sure that none of our interesting fields start with
            # the character that we used to convert $ commands
            for key in interesting:
                assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), "Unexpected candidate key '%s.%s'" % (
                    col_info["name"],
                    key,
                )

            if constants.SKIP_MONGODB_ID_FIELD and "_id" in interesting:
                interesting = interesting[:]
                interesting.remove("_id")

            # deal with shards
            if isShardingEnabled:
                LOG.debug("Sharding is enabled")
                fields = col_info["fields"]
                max_query_use_count = 1
                interesting_scores = []
                for interesting_key in interesting:
                    cardinality = fields[interesting_key]["cardinality"]
                    if cardinality > 0:
                        cardinality = math.log(cardinality, 2)
                    else:
                        cardinality = 1
                    query_use_count = fields[interesting_key]["query_use_count"]
                    if query_use_count > max_query_use_count:
                        max_query_use_count = query_use_count
                    score = cardinality * query_use_count
                    interesting_scores.append((interesting_key, score, query_use_count))
                sorted(interesting_scores, key=itemgetter(1), reverse=True)
                shardKeys = []
                for interesting_key in interesting_scores:
                    if (interesting_key[2] / float(max_query_use_count)) > 0.1:
                        shardKeys.append(interesting_key[0])

            # deal with indexes
            if isIndexesEnabled:
                LOG.debug("Indexes is enabled")
                for o in xrange(1, len(interesting) + 1):
                    if o > constants.MAX_INDEX_SIZE:
                        break
                    for i in itertools.permutations(interesting, o):
                        indexKeys.append(i)
                    ## FOR
                ## FOR
            # deal with de-normalization
            if len(indexKeys) > 10:
                LOG.warn("Too many index keys: %s", len(indexKeys))
            if isDenormalizationEnabled:
                LOG.debug("Denormalization is enabled")
                for k, v in col_info["fields"].iteritems():
                    if (
                        v["parent_col"] <> None
                        and v["parent_col"] not in denorm
                        and v["parent_col"] in valid_collection
                    ):
                        denorm.append(v["parent_col"])

            dc.addCollection(col_info["name"], indexKeys, shardKeys, denorm)
            ## FOR

        return dc