Exemplo n.º 1
0
 def remove(self, txn, entity_id):
     entity_chunk_sub_key = self._graph._get(
         txn, build_key(self._prefix, entity_id))
     if entity_chunk_sub_key != UNDEFINED:
         # remove entity_id from chunk
         chunk_key = build_key(self._prefix, entity_chunk_sub_key)
         self._graph._remove_from_lst(txn, chunk_key, entity_id)
         # Finally remove entity_chunk_sub_key
         self._graph._remove(txn, entity_chunk_sub_key)
Exemplo n.º 2
0
    def bulk_add(self, txn, id_iterator):
        def save_lst(txn, chunk_idx, value, lst):
            lst_key = build_key(self._prefix, chunk_idx, value)
            self._graph._set_lst(txn, lst_key, lst)

        kind = self._kind
        values_to_id = defaultdict(list)
        # Getting data for all entity_id in chunk of <CHUNK_SIZE>
        filter_length = self._filter_length
        filter_funcs = self._filter_funcs
        for entity_ids in zip_longest(*([id_iterator] * CHUNK_SIZE),
                                      fillvalue=FORBIDDEN_KEY):
            entity_datas = self._graph._bulk_get(txn, [
                build_key(kind, entity_id, DATA_SUFFIX)
                for entity_id in entity_ids if entity_id != FORBIDDEN_KEY
            ])
            for entity_key, data in list(entity_datas.items()):
                if filter_length and not entity_match(filter_funcs, data):
                    continue
                entity_id = int(
                    entity_key.replace(kind + '/',
                                       '').replace('/' + DATA_SUFFIX,
                                                   ''))  # not very clean :(
                lst = [data.get(field, None) for field in self._fields]
                # Completing values_to_id (it's here that the reversal value -> entity_id become possible)
                values_to_id[normalize_value(lst)].append(entity_id)
        # This dic will contain the mapping entity id index key -> value chunk id
        entity_dict = {}
        for value, entity_ids in values_to_id.items():
            lst = []
            value_count_key = build_key(self._prefix, COUNT_SUFFIX, value)
            value_count = self._graph._get(txn, value_count_key)
            if value_count == UNDEFINED:
                value_count = 0
            chunk_idx = value_count // CHUNK_SIZE
            for entity_id in entity_ids:
                value_count += 1
                entity_dict[build_key(self._prefix, entity_id)] = build_key(
                    chunk_idx, value
                )  # Need to keep an info of relation between entity and the chunk id that contains it
                lst.append(entity_id)
                if value_count % CHUNK_SIZE == 0:
                    save_lst(txn, chunk_idx, value, lst)
                    lst = []
                    chunk_idx += 1
            if lst:
                save_lst(txn, chunk_idx, value, lst)
            self._graph._set(txn, value_count_key, value_count)
        self._graph._bulk_set(txn, entity_dict)
Exemplo n.º 3
0
 def ids(self, txn, filters):
     resulting_filters = self._compatible_filters(filters)
     if resulting_filters is False:
         raise GrapheekIncompetentIndexException(
             "This index shouldn't have been used")
     for values in product(*resulting_filters):
         value = normalize_value(values)
         value_count_key = build_key(self._prefix, COUNT_SUFFIX, value)
         value_count_current = self._graph._get(txn, value_count_key)
         if value_count_current != UNDEFINED:
             for chunk_idx in range(
                     0, 1 + int(value_count_current) // CHUNK_SIZE):
                 entity_ids_key = build_key(self._prefix, chunk_idx, value)
                 entity_ids = self._graph._get_lst(txn, entity_ids_key)
                 if entity_ids != UNDEFINED:
                     for entity_id in entity_ids:
                         yield entity_id
Exemplo n.º 4
0
 def estimate(self, txn, filters):
     resulting_filters = self._compatible_filters(filters)
     if resulting_filters is False:
         return -1  # -1 says that this index shouldn't be used
     total = 0
     for values in product(*resulting_filters):
         value = normalize_value(values)
         value_count_key = build_key(self._prefix, COUNT_SUFFIX, value)
         value_count_current = self._graph._get(txn, value_count_key)
         if value_count_current != UNDEFINED:
             total += int(value_count_current)
     return total  # note : this is an over estimation
Exemplo n.º 5
0
 def get_kind_ids(self, txn, kind):
     ENTITY_COUNTER = METADATA_VERTEX_COUNTER if kind == KIND_VERTEX else METADATA_EDGE_COUNTER
     METADATA_ID_LIST_PREFIX = METADATA_VERTEX_ID_LIST_PREFIX if kind == KIND_VERTEX else METADATA_EDGE_ID_LIST_PREFIX
     limit = int(self._graph._get(None, ENTITY_COUNTER)) // CHUNK_SIZE
     keys = [
         build_key(METADATA_ID_LIST_PREFIX, i) for i in range(0, limit + 1)
     ]
     list_entity_ids = self._graph._bulk_get_lst(txn, keys)
     for entity_ids in list_entity_ids:
         if entity_ids != UNDEFINED:
             for entity_id in entity_ids:
                 yield entity_id
Exemplo n.º 6
0
 def add(self, txn, entity_id, data):
     if self._filter_length and not entity_match(self._filter_funcs, data):
         return
     value = normalize_value(
         [data.get(field, None) for field in self._fields])
     # Get current chunk idx for this value (NOTE : value is a string representing a list)
     value_count_key = build_key(self._prefix, COUNT_SUFFIX, value)
     value_count = 0
     value_count_current = self._graph._get(txn, value_count_key)
     if value_count_current != UNDEFINED:
         value_count = int(value_count_current)
     # Incrementing entity count for value
     self._graph._set(txn, value_count_key, value_count + 1)
     #  1st adding entity_id to good chunk :
     chunk_idx = value_count // CHUNK_SIZE
     chunk_key = build_key(self._prefix, chunk_idx, value)
     self._graph._append_to_lst(txn, chunk_key, entity_id)
     # Say that entity_id is in a chunk
     # (we need to keep chunk idx, so that entity id can be removed fastly
     # from chunk when entity will be removed from DB)
     self._graph._set(txn, build_key(self._prefix, entity_id),
                      build_key(chunk_idx, value))
Exemplo n.º 7
0
 def test_update_edge_data(self):
     # Manually modifying backend
     from grapheekdb.backends.data.keys import KIND_EDGE, DATA_SUFFIX
     from grapheekdb.backends.data.keys import build_key
     txn = self.graph._transaction_begin()
     try:
         self.graph._remove(
             txn, build_key(KIND_EDGE, self.e1.get_id(), DATA_SUFFIX))
         self.graph._transaction_commit(txn)
     except:
         self.graph._transaction_rollback(txn)
     exception_raised = False
     try:
         self.e1.foobar = 3
     except GrapheekDataException:
         exception_raised = True
     assert (exception_raised)
 def test_remove_node(self):
     # Manually modifying backend
     from grapheekdb.backends.data.keys import KIND_VERTEX, OUT_EDGES_SUFFIX
     from grapheekdb.backends.data.keys import build_key
     txn = self.graph._transaction_begin()
     try:
         # Manually modifying backend
         self.graph._remove(txn, build_key(KIND_VERTEX, self.n1.get_id(), OUT_EDGES_SUFFIX))
         self.graph._transaction_commit(txn)
     except:
         self.graph._transaction_rollback(txn)
     exception_raised = False
     try:
         self.n1.remove()
     except GrapheekDataException:
         exception_raised = True
     assert(exception_raised)
Exemplo n.º 9
0
 def save_lst(txn, chunk_idx, value, lst):
     lst_key = build_key(self._prefix, chunk_idx, value)
     self._graph._set_lst(txn, lst_key, lst)