def get_jsonldish_entity_parents_db(self, entity_uri): """ returns the parents of an entity """ lr = LinkRecursion() lr.mem_cache_parents = self.entity_parents lr.mem_cache_entities = self.entities parents = lr.get_jsonldish_entity_parents(entity_uri) # now save the entities that whent into this. self.entity_parents = lr.mem_cache_parents for key, entity in lr.mem_cache_entities.items(): if key not in self.entities: self.entities[key] = entity return parents
def get_entity_parents(self): """ gets the parents of a given entity """ if isinstance(self.uri, str): lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(self.uri, False) if isinstance(parents, list): for parent in parents: ent_dict = LastUpdatedOrderedDict() ent_dict['id'] = parent['id'] ent_dict['label'] = parent['label'] ent_dict['slug'] = parent['slug'] ent_dict['href'] = self.make_local_url(parent['id']) self.parents.append(ent_dict)
def get_parent_entity_facet_field(self, entity_uri): """ Gets the parent facet field for a given category_uri. This assumes the category_uri is an entity that exists in the database. """ output = False; lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(entity_uri) if isinstance(parents, list): if len(parents) > 1: # get the penultimate field output = parents[-2]['slug'].replace('-', '_') + '___pred_id' return output
def alter_annotation_hiearchy(self): """ alters hierarchic annotations expressed in SKOS or OWL relations """ lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(self.uuid, False) if isinstance(parents, list): if len(parents) > 0: # the item has SKOS / OWL parents parent_id = parents[-1]['id'] lam = LinkAnnoManagement() lam.source_id = self.editorial_uuid # so editorial uuid associated with change lam.replace_hierarchy(self.uuid, parent_id)
def get_parent_item_type_facet_field(self, category_uri): """ Gets the parent facet field for a given category_uri. This assumes the category_uri is an entity that exists in the database. """ output = False; lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(category_uri) for par in parents: if par['slug'] in self.TYPE_MAPPINGS.values(): # the parent exists in the Type Mappings output = par['slug'].replace('-', '_') + '___pred_id' break return output
def make_root_entity_list(self, entity_list): """ makes a list of entities that are not children of other items in the hierarchy """ root_entities = [] for act_ent in entity_list: lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(act_ent.uri, False) if parents is False: ent_dict = LastUpdatedOrderedDict() ent_dict['id'] = act_ent.uri ent_dict['label'] = act_ent.label ent_dict['slug'] = act_ent.slug ent_dict['href'] = self.make_local_url(act_ent.uri) ent_dict['children'] = self.get_uri_children(act_ent.uri) if len(ent_dict['children']) > 0: ent_dict['more'] = True; root_entities.append(ent_dict) return root_entities
def make_root_entity_list(self, entity_list): """ makes a list of entities that are not children of other items in the hierarchy """ root_entities = [] for act_ent in entity_list: lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(act_ent.uri, False) if parents is False: ent_dict = LastUpdatedOrderedDict() ent_dict['id'] = act_ent.uri ent_dict['label'] = act_ent.label ent_dict['slug'] = act_ent.slug ent_dict['href'] = self.make_local_url(act_ent.uri) ent_dict['children'] = self.get_uri_children(act_ent.uri) if len(ent_dict['children']) > 0: ent_dict['more'] = True root_entities.append(ent_dict) return root_entities
def validate_make_eol_hierarchy(self, child_uri, parent_uri): """ Validated hierarchy relations for EOL entities. If a child already has a parent, this will not do anything otherwise it will create a hierachy relation """ ok_create = False le_gen = LinkEntityGeneration() child_uri = le_gen.make_clean_uri( child_uri) # strip off any cruft in the URI parent_uri = le_gen.make_clean_uri(parent_uri) ent = Entity() found = ent.dereference(child_uri) if found: lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(child_uri, False) if parents is False: # no parents, so OK to make an assertion ok_create = True else: if len(parents) == 0: # no parents, so OK to make an assertion ok_create = True else: ok_create = True # the child does not yet exist, so OK to make the relation print('Getting missing data for: ' + child_uri) self.get_save_entity_label(child_uri) if ok_create: print('OK, make rel for: ' + child_uri + ' in ' + parent_uri) la = LinkAnnotation() la.subject = child_uri la.subject_type = 'uri' la.project_uuid = '0' la.source_id = 'manual-eol-manage' la.predicate_uri = self.CHILD_PARENT_REL la.object_uri = parent_uri la.sort = 1 la.save() else: print('Already in hierarchy: ' + child_uri)
def validate_make_eol_hierarchy(self, child_uri, parent_uri): """ Validated hierarchy relations for EOL entities. If a child already has a parent, this will not do anything otherwise it will create a hierachy relation """ ok_create = False le_gen = LinkEntityGeneration() child_uri = le_gen.make_clean_uri(child_uri) # strip off any cruft in the URI parent_uri = le_gen.make_clean_uri(parent_uri) ent = Entity() found = ent.dereference(child_uri) if found: lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(child_uri, False) if parents is False: # no parents, so OK to make an assertion ok_create = True else: if len(parents) == 0: # no parents, so OK to make an assertion ok_create = True else: ok_create = True # the child does not yet exist, so OK to make the relation print('Getting missing data for: ' + child_uri) self.get_save_entity_label(child_uri) if ok_create: print('OK, make rel for: ' + child_uri + ' in ' + parent_uri) la = LinkAnnotation() la.subject = child_uri la.subject_type = 'uri' la.project_uuid = '0' la.source_id = 'manual-eol-manage' la.predicate_uri = self.CHILD_PARENT_REL la.object_uri = parent_uri la.sort = 1 la.save() else: print('Already in hierarchy: ' + child_uri)
def process_prop(self, props): """ processes 'prop' (property) parameters property parameters are tricky because they can come in hierarchies that's why there's some complexity to this """ # is the property for the item itself, or for a related item? query_dict = {'fq': [], 'facet.field': [], 'stats.field': [], 'prequery-stats': [], 'facet.range': [], 'hl-queries': [], 'ranges': {}} fq_terms = [] prop_path_lists = self.expand_hierarchy_options(props) for prop_path_list in prop_path_lists: i = 0 path_list_len = len(prop_path_list) fq_path_terms = [] act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR act_field_data_type = 'id' last_field_label = False # needed for full text highlighting predicate_solr_slug = False for prop_slug in prop_path_list: field_prefix = self.get_related_slug_field_prefix(prop_slug) solr_f_prefix = field_prefix.replace('-', '_') db_prop_slug = self.clean_related_slug(prop_slug) l_prop_entity = False pred_prop_entity = False require_id_field = False if act_field_data_type == 'id': # check entity exists, and save to memory entity = self.m_cache.get_entity(db_prop_slug) if entity: last_field_label = entity.label prop_slug = field_prefix + entity.slug if entity.item_type == 'uri' and not db_prop_slug.startswith('oc-gen'): if entity.entity_type == 'property': pred_prop_entity = True predicate_solr_slug = prop_slug.replace('-', '_') l_prop_entity = True children = LinkRecursion().get_entity_children(entity.uri) if len(children) > 0: # ok, this field has children. require it # to be treated as an ID field require_id_field = True else: if entity.item_type == 'predicates': pred_prop_entity = True predicate_solr_slug = prop_slug.replace('-', '_') children = LinkRecursion().get_entity_children(entity.uri) if len(children) > 0: # ok, this field has children. require it # to be treated as an ID field require_id_field = True if i == 0: if db_prop_slug.startswith('oc-gen'): # for open context categories / types act_field_fq = self.get_parent_item_type_facet_field(entity.uri) lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(entity.uri) if len(parents) > 1: try: p_slug = parents[-2]['slug'] act_field_fq = p_slug.replace('-', '_') + '___pred_id' act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) except: pass print('Predicate Parent exception: '+ str(parents)) elif entity.item_type == 'uri': act_field_fq = SolrDocument.ROOT_LINK_DATA_SOLR elif entity.item_type == 'predicates': temp_field_fq = self.get_parent_item_type_facet_field(entity.uri) lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(entity.uri) if len(parents) > 1: try: p_slug = parents[-2]['slug'] temp_field_fq = p_slug.replace('-', '_') + '___pred_id' except: print('Predicate Parent exception: '+ str(parents)) temp_field_fq = False if temp_field_fq is not False: act_field_fq = temp_field_fq else: act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR else: act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR # --------------------------------------------------- # THIS PART BUILDS THE FACET-QUERY # fq_path_term = fq_field + ':' + self.make_solr_value_from_entity(entity) # the below is a bit of a hack. We should have a query field # as with ___pred_ to query just the slug. But this works for now fq_field = act_field_fq + '_fq' if path_list_len >= 2 and act_field_data_type == 'id': # could be an object deeper in the hierarchy, so allow the obj_all version fq_path_term = '(' + fq_field + ':' + prop_slug fq_path_term += ' OR obj_all___' + fq_field + ':' + prop_slug + ')' else: fq_path_term = fq_field + ':' + prop_slug fq_path_terms.append(fq_path_term) #--------------------------------------------------- # #--------------------------------------------------- # THIS PART PREPARES FOR LOOPING OR FINAL FACET-FIELDS # # print('pred-solr-slug: ' + predicate_solr_slug) field_parts = self.make_prop_solr_field_parts(entity) act_field_data_type = field_parts['suffix'] if require_id_field: act_field_data_type = 'id' field_parts['suffix'] = 'id' # check if the last or penultimate field has # a different data-type (for linked-data) if i >= (path_list_len - 2) \ and l_prop_entity: dtypes = self.s_cache.get_dtypes(entity.uri) if isinstance(dtypes, list): # set the data type and the act-field act_field_data_type = self.get_solr_field_type(dtypes[0]) if not predicate_solr_slug or pred_prop_entity: act_field_fq = field_parts['prefix'] + '___pred_' + field_parts['suffix'] act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) # get a facet on this field if act_field_data_type != 'string': # adds a prefix for related properties ffield = solr_f_prefix + field_parts['prefix'] + '___pred_' + field_parts['suffix'] if ffield not in query_dict['facet.field'] and \ i >= (path_list_len - 1): query_dict['facet.field'].append(ffield) else: if act_field_data_type == 'id': act_field_fq = 'obj_all___' + predicate_solr_slug \ + '___pred_' + field_parts['suffix'] # get a facet on this field if predicate_solr_slug != field_parts['prefix']: # the predicate_solr_slug is not the # prefix of the current field part, meaning # the field_parts[prefix] is the type, and # we want facets for the predicate -> type ffield = field_parts['prefix'] \ + '___' \ + predicate_solr_slug \ + '___pred_' + field_parts['suffix'] else: # get facets for the predicate ffield = field_parts['prefix'] \ + '___pred_' \ + field_parts['suffix'] # adds a prefix, in case of a related property ffield = solr_f_prefix + ffield if ffield not in query_dict['facet.field'] \ and i >= (path_list_len - 1): query_dict['facet.field'].append(ffield) else: act_field_fq = predicate_solr_slug + '___pred_' + field_parts['suffix'] # ------------------------------------------- if act_field_data_type == 'numeric': # print('Numeric field: ' + act_field) act_field_fq = field_parts['prefix'] + '___pred_numeric' act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) query_dict = self.add_math_facet_ranges(query_dict, act_field_fq, entity) elif act_field_data_type == 'date': # print('Date field: ' + act_field) act_field_fq = field_parts['prefix'] + '___pred_date' act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) query_dict = self.add_date_facet_ranges(query_dict, act_field_fq, entity) # print('Current data type (' + str(i) + '): ' + act_field_data_type) # print('Current field (' + str(i) + '): ' + act_field_fq) i += 1 elif act_field_data_type == 'string': # case for a text search # last_field_label = False # turn off using the field label for highlighting string_terms = self.prep_string_search_term(prop_slug) for escaped_term in string_terms: search_term = act_field_fq + ':' + escaped_term if last_field_label is False: query_dict['hl-queries'].append(escaped_term) else: query_dict['hl-queries'].append(last_field_label + ' ' + escaped_term) fq_path_terms.append(search_term) elif act_field_data_type == 'numeric': # numeric search. assume it's well formed solr numeric request search_term = act_field_fq + ':' + prop_slug fq_path_terms.append(search_term) # now limit the numeric ranges from query to the range facets query_dict = self.add_math_facet_ranges(query_dict, act_field_fq, False, prop_slug) elif act_field_data_type == 'date': # date search. assume it's well formed solr request search_term = act_field_fq + ':' + prop_slug fq_path_terms.append(search_term) # now limit the date ranges from query to the range facets query_dict = self.add_date_facet_ranges(query_dict, act_field_fq, False, prop_slug) final_path_term = ' AND '.join(fq_path_terms) final_path_term = '(' + final_path_term + ')' fq_terms.append(final_path_term) fq_final = ' OR '.join(fq_terms) fq_final = '(' + fq_final + ')' query_dict['fq'].append(fq_final) return query_dict
def process_prop(self, props): """ processes 'prop' (property) parameters property parameters are tricky because they can come in hierarchies that's why there's some complexity to this """ # is the property for the item itself, or for a related item? query_dict = {'fq': [], 'facet.field': [], 'stats.field': [], 'prequery-stats': [], 'facet.range': [], 'hl-queries': [], 'ranges': {}} fq_terms = [] prop_path_lists = self.expand_hierarchy_options(props) for prop_path_list in prop_path_lists: i = 0 path_list_len = len(prop_path_list) fq_path_terms = [] act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR act_field_data_type = 'id' last_field_label = False # needed for full text highlighting predicate_solr_slug = False for prop_slug in prop_path_list: field_prefix = self.get_related_slug_field_prefix(prop_slug) solr_f_prefix = field_prefix.replace('-', '_') db_prop_slug = self.clean_related_slug(prop_slug) l_prop_entity = False pred_prop_entity = False require_id_field = False if act_field_data_type == 'id': # check entity exists, and save to memory found = self.mem_cache_obj.check_entity_found(db_prop_slug, False) if found: entity = self.mem_cache_obj.get_entity(db_prop_slug, False) last_field_label = entity.label prop_slug = field_prefix + entity.slug if entity.item_type == 'uri' and 'oc-gen' not in db_prop_slug: if entity.entity_type == 'property': pred_prop_entity = True predicate_solr_slug = prop_slug.replace('-', '_') l_prop_entity = True children = self.mem_cache_obj.get_entity_children(entity.uri) if len(children) > 0: # ok, this field has children. require it # to be treated as an ID field require_id_field = True else: if entity.item_type == 'predicates': pred_prop_entity = True predicate_solr_slug = prop_slug.replace('-', '_') children = self.mem_cache_obj.get_entity_children(entity.uri) if len(children) > 0: # ok, this field has children. require it # to be treated as an ID field require_id_field = True if i == 0: if 'oc-gen' in db_prop_slug: # for open context categories / types act_field_fq = self.get_parent_item_type_facet_field(entity.uri) lr = LinkRecursion() parents = lr.get_jsonldish_entity_parents(entity.uri) if len(parents) > 1: try: p_slug = parents[-2]['slug'] act_field_fq = p_slug.replace('-', '_') + '___pred_id' act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) except: pass elif entity.item_type == 'uri': act_field_fq = SolrDocument.ROOT_LINK_DATA_SOLR elif entity.item_type == 'predicates': temp_field_fq = self.get_parent_item_type_facet_field(entity.uri) parents = self.mem_cache_obj.get_jsonldish_entity_parents(entity.uri) if len(parents) > 1: try: p_slug = parents[-2]['slug'] temp_field_fq = p_slug.replace('-', '_') + '___pred_id' except: print('Predicate Parent exception: '+ str(parents)) temp_field_fq = False if temp_field_fq is not False: act_field_fq = temp_field_fq else: act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR else: act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR # --------------------------------------------------- # THIS PART BUILDS THE FACET-QUERY # fq_path_term = fq_field + ':' + self.make_solr_value_from_entity(entity) # the below is a bit of a hack. We should have a query field # as with ___pred_ to query just the slug. But this works for now fq_field = act_field_fq + '_fq' if path_list_len >= 2 and act_field_data_type == 'id': # could be an object deeper in the hierarchy, so allow the obj_all version fq_path_term = '(' + fq_field + ':' + prop_slug fq_path_term += ' OR obj_all___' + fq_field + ':' + prop_slug + ')' else: fq_path_term = fq_field + ':' + prop_slug fq_path_terms.append(fq_path_term) #--------------------------------------------------- # #--------------------------------------------------- # THIS PART PREPARES FOR LOOPING OR FINAL FACET-FIELDS # # print('pred-solr-slug: ' + predicate_solr_slug) field_parts = self.make_prop_solr_field_parts(entity) act_field_data_type = field_parts['suffix'] if require_id_field: act_field_data_type = 'id' field_parts['suffix'] = 'id' # check if the last or penultimate field has # a different data-type (for linked-data) if i >= (path_list_len - 2) \ and l_prop_entity: dtypes = self.mem_cache_obj.get_dtypes(entity.uri) if isinstance(dtypes, list): # set te data type and the act-field found = self.mem_cache_obj.check_entity_found(db_prop_slug, False) if found: entity = self.mem_cache_obj.get_entity(db_prop_slug, False) entity.date_type = dtypes[0] # store for later use self.mem_cache_obj.entities[db_prop_slug] = entity # store for later use act_field_data_type = self.get_solr_field_type(dtypes[0]) if predicate_solr_slug is False or pred_prop_entity: act_field_fq = field_parts['prefix'] + '___pred_' + field_parts['suffix'] act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) # get a facet on this field if act_field_data_type != 'string': # adds a prefix for related properties ffield = solr_f_prefix + field_parts['prefix'] + '___pred_' + field_parts['suffix'] if ffield not in query_dict['facet.field'] \ and i >= (path_list_len - 1): query_dict['facet.field'].append(ffield) else: if act_field_data_type == 'id': act_field_fq = 'obj_all___' + predicate_solr_slug \ + '___pred_' + field_parts['suffix'] # get a facet on this field if predicate_solr_slug != field_parts['prefix']: # the predicate_solr_slug is not the # prefix of the current field part, meaning # the field_parts[prefix] is the type, and # we want facets for the predicate -> type ffield = field_parts['prefix'] \ + '___' \ + predicate_solr_slug \ + '___pred_' + field_parts['suffix'] else: # get facets for the predicate ffield = field_parts['prefix'] \ + '___pred_' \ + field_parts['suffix'] # adds a prefix, in case of a related property ffield = solr_f_prefix + ffield if ffield not in query_dict['facet.field'] \ and i >= (path_list_len - 1): query_dict['facet.field'].append(ffield) else: act_field_fq = predicate_solr_slug + '___pred_' + field_parts['suffix'] # ------------------------------------------- if act_field_data_type == 'numeric': # print('Numeric field: ' + act_field) act_field_fq = field_parts['prefix'] + '___pred_numeric' act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) query_dict = self.add_math_facet_ranges(query_dict, act_field_fq, entity) elif act_field_data_type == 'date': # print('Date field: ' + act_field) act_field_fq = field_parts['prefix'] + '___pred_date' act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq) query_dict = self.add_date_facet_ranges(query_dict, act_field_fq, entity) # print('Current data type (' + str(i) + '): ' + act_field_data_type) # print('Current field (' + str(i) + '): ' + act_field_fq) i += 1 elif act_field_data_type == 'string': # case for a text search # last_field_label = False # turn off using the field label for highlighting string_terms = self.prep_string_search_term(prop_slug) for escaped_term in string_terms: search_term = act_field_fq + ':' + escaped_term if last_field_label is False: query_dict['hl-queries'].append(escaped_term) else: query_dict['hl-queries'].append(last_field_label + ' ' + escaped_term) fq_path_terms.append(search_term) elif act_field_data_type == 'numeric': # numeric search. assume it's well formed solr numeric request search_term = act_field_fq + ':' + prop_slug fq_path_terms.append(search_term) # now limit the numeric ranges from query to the range facets query_dict = self.add_math_facet_ranges(query_dict, act_field_fq, False, prop_slug) elif act_field_data_type == 'date': # date search. assume it's well formed solr request search_term = act_field_fq + ':' + prop_slug fq_path_terms.append(search_term) # now limit the date ranges from query to the range facets query_dict = self.add_date_facet_ranges(query_dict, act_field_fq, False, prop_slug) final_path_term = ' AND '.join(fq_path_terms) final_path_term = '(' + final_path_term + ')' fq_terms.append(final_path_term) fq_final = ' OR '.join(fq_terms) fq_final = '(' + fq_final + ')' query_dict['fq'].append(fq_final) return query_dict