Пример #1
0
 def add_skos_hierarachy(self, parent_uri, child_uri):
     """ Add a hiearchy assertion for
         linked entities
     """
     try:
         parent = LinkEntity.objects.get(uri=parent_uri)
     except LinkEntity.DoesNotExist:
         parent = False
     try:
         child = LinkEntity.objects.get(uri=child_uri)
     except LinkEntity.DoesNotExist:
         child = False
     if parent is not False and child is not False:
         lr = LinkRecursion()
         exiting_parents = lr.get_entity_parents(child_uri)
         if len(exiting_parents) >= 1:
             print('Child has parents: ' + str(exiting_parents))
         else:
             # child is not already in a hieararchy, ok to put it in one
             la = LinkAnnotation()
             la.subject = child.uri  # the subordinate is the subject
             la.subject_type = 'uri'
             la.project_uuid = self.project_uuid
             la.source_id = self.source_id + '-hierarchy'
             la.predicate_uri = self.PRED_SBJ_IS_SUB_OF_OBJ
             la.object_uri = parent.uri  # the parent is the object
             la.save()
             print('Made: ' + child.uri + ' child of: ' + parent.uri)
     else:
         print('Cannot find parent or child')
Пример #2
0
 def add_skos_hierarachy(self, parent_uri, child_uri):
     """ Add a hiearchy assertion for
         linked entities
     """
     try:
         parent = LinkEntity.objects.get(uri=parent_uri)
     except LinkEntity.DoesNotExist:
         parent = False
     try:
         child = LinkEntity.objects.get(uri=child_uri)
     except LinkEntity.DoesNotExist:
         child = False
     if parent is not False and child is not False:
         lr = LinkRecursion()
         exiting_parents = lr.get_entity_parents(child_uri)
         if len(exiting_parents) >= 1:
             print('Child has parents: ' + str(exiting_parents))
         else:
             # child is not already in a hieararchy, ok to put it in one
             la = LinkAnnotation()
             la.subject = child.uri  # the subordinate is the subject
             la.subject_type = 'uri'
             la.project_uuid = self.project_uuid
             la.source_id = self.source_id + '-hierarchy'
             la.predicate_uri = self.PRED_SBJ_IS_SUB_OF_OBJ
             la.object_uri = parent.uri  # the parent is the object
             la.save()
             print('Made: ' + child.uri + ' child of: ' + parent.uri)
     else:
         print('Cannot find parent or child')
Пример #3
0
 def get_uri_children(self, uri):
     """ gets the children for a given uri """
     output = []
     if isinstance(uri, str):
         lr = LinkRecursion()
         lr.get_entity_children(uri, False)
         children = lr.child_entities
         if isinstance(children, dict):
             child_uris = []
             for ch_uri, ch_list in children.items():
                 if uri == ch_uri:
                     child_uris = ch_list
                     break
             # now we have child entity uris, put them in the proper order
             lequiv = LinkEquivalence()
             child_uris = lequiv.get_identifier_list_variants(child_uris)
             child_ents = LinkEntity.objects\
                                    .filter(uri__in=child_uris)\
                                    .exclude(uri=self.uri)\
                                    .order_by('sort', 'label', 'uri')
             for act_ent in child_ents:
                 ent_dict = LastUpdatedOrderedDict()
                 ent_dict['id'] = act_ent.uri
                 ent_dict['label'] = act_ent.label
                 ent_dict['slug'] = act_ent.slug
                 ent_dict['href'] = self.make_local_url(act_ent.uri)
                 output.append(ent_dict)
     return output
Пример #4
0
 def get_uri_children(self, uri):
     """ gets the children for a given uri """
     output = []
     if isinstance(uri, str):
         lr = LinkRecursion()
         lr.get_entity_children(uri, False)
         children = lr.child_entities
         if isinstance(children, dict):
             child_uris = []
             for ch_uri, ch_list in children.items():
                 if uri == ch_uri:
                     child_uris = ch_list
                     break
             # now we have child entity uris, put them in the proper order
             lequiv = LinkEquivalence()
             child_uris = lequiv.get_identifier_list_variants(child_uris)
             child_ents = LinkEntity.objects\
                                    .filter(uri__in=child_uris)\
                                    .exclude(uri=self.uri)\
                                    .order_by('sort', 'label', 'uri')
             for act_ent in child_ents:
                 ent_dict = LastUpdatedOrderedDict()
                 ent_dict['id'] = act_ent.uri
                 ent_dict['label'] = act_ent.label
                 ent_dict['slug'] = act_ent.slug
                 ent_dict['href'] = self.make_local_url(act_ent.uri)
                 output.append(ent_dict)
     return output
Пример #5
0
 def make_reconcilation_json(self, search_term, geojson_ld):
     """ takes the geojson_ld and
         makes a reconcilation json
         output
     """
     recon_json = False
     if isinstance(geojson_ld, dict):
         self.geojson_ld = geojson_ld
         recon_json = []
         if 'oc-api:has-facets' in geojson_ld:
             facets = geojson_ld['oc-api:has-facets']
             for facet in facets:
                 if 'oc-api:has-id-options' in facet:
                     max_count = 0
                     for facet_value in facet['oc-api:has-id-options']:
                         if facet_value['count'] > max_count:
                             max_count = facet_value['count']
                     id_ranks = {}
                     for facet_value in facet['oc-api:has-id-options']:
                         id_uri = facet_value['rdfs:isDefinedBy']
                         lr = LinkRecursion()
                         lr.get_entity_children(id_uri)
                         if len(lr.child_entities) > 0:
                             levels = len(lr.child_entities) + 1
                         else:
                             levels = 1
                         # calculate a ranking for items, with more specific (fewer children)
                         # categories ranked higher
                         rank = (facet_value['count'] / levels) / max_count
                         match_count = self.count_label_matches(
                             search_term, id_uri)
                         rank = rank + (match_count / levels)
                         id_ranks[id_uri] = rank
                     sorted_ids = sorted(id_ranks.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
                     for id_key, rank in sorted_ids:
                         if len(recon_json) < 5:
                             for facet_value in facet[
                                     'oc-api:has-id-options']:
                                 if facet_value[
                                         'rdfs:isDefinedBy'] == id_key:
                                     rank_item = LastUpdatedOrderedDict()
                                     rank_item['id'] = id_key
                                     rank_item['label'] = facet_value[
                                         'label']
                                     rank_item['rank'] = rank
                                     rank_item[
                                         'related-labels'] = self.get_related_labels(
                                             id_key, True)
                                     recon_json.append(rank_item)
                                     break
                         else:
                             break
     return recon_json
Пример #6
0
 def get_children(self, identifier):
     """ Gets SKOS or OWL children for an entity
     """
     ent = Entity()
     found = ent.dereference(identifier)
     if found:
         self.children = []
         lr = LinkRecursion()
         lr.get_entity_children(identifier)
         self.children = lr.child_entities
     return self.children
Пример #7
0
 def get_children(self, identifier):
     """ Gets SKOS or OWL children for an entity
     """
     ent = Entity()
     found = ent.dereference(identifier)
     if found:
         self.children = []
         lr = LinkRecursion()
         lr.get_entity_children(identifier)
         self.children = lr.child_entities
     return self.children
Пример #8
0
 def get_jsonldish_entity_parents_db(self, entity_uri):
     """ returns the parents of an entity """
     lr = LinkRecursion()
     lr.mem_cache_parents = self.entity_parents
     lr.mem_cache_entities = self.entities
     parents = lr.get_jsonldish_entity_parents(entity_uri)
     # now save the entities that whent into this.
     self.entity_parents = lr.mem_cache_parents
     for key, entity in lr.mem_cache_entities.items():
         if key not in self.entities:
             self.entities[key] = entity
     return parents
Пример #9
0
 def get_entity_parents(self):
     """ gets the parents of a given entity """
     if isinstance(self.uri, str):
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(self.uri, False)
         if isinstance(parents, list):
             for parent in parents:
                 ent_dict = LastUpdatedOrderedDict()
                 ent_dict['id'] = parent['id']
                 ent_dict['label'] = parent['label']
                 ent_dict['slug'] = parent['slug']
                 ent_dict['href'] = self.make_local_url(parent['id'])
                 self.parents.append(ent_dict)
Пример #10
0
 def get_parent_entity_facet_field(self, entity_uri):
     """ Gets the parent facet field for a given
         category_uri. This assumes the category_uri is an entity
         that exists in the database.
     """
     output = False;
     lr = LinkRecursion()
     parents = lr.get_jsonldish_entity_parents(entity_uri)
     if isinstance(parents, list):
         if len(parents) > 1:
             # get the penultimate field
             output = parents[-2]['slug'].replace('-', '_') + '___pred_id'
     return output
Пример #11
0
 def get_entity_parents(self):
     """ gets the parents of a given entity """
     if isinstance(self.uri, str):
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(self.uri, False)
         if isinstance(parents, list):
             for parent in parents:
                 ent_dict = LastUpdatedOrderedDict()
                 ent_dict['id'] = parent['id']
                 ent_dict['label'] = parent['label']
                 ent_dict['slug'] = parent['slug']
                 ent_dict['href'] = self.make_local_url(parent['id'])
                 self.parents.append(ent_dict)
Пример #12
0
 def get_entity_children_db(self, entity_uri):
     """ returns the children of an entity """
     children = []
     if entity_uri in self.entity_children:
         children = self.entity_children[entity_uri]
     else:
         lr = LinkRecursion()
         lr.mem_cache_entities = self.entities
         lr.child_entities = self.entity_children
         children = lr.get_entity_children(entity_uri)
         self.entities = lr.mem_cache_entities
         self.entity_children = lr.child_entities
     return children
Пример #13
0
 def get_parent_item_type_facet_field(self, category_uri):
     """ Gets the parent facet field for a given
         category_uri. This assumes the category_uri is an entity
         that exists in the database.
     """
     output = False;
     lr = LinkRecursion()
     parents = lr.get_jsonldish_entity_parents(category_uri)
     for par in parents:
         if par['slug'] in self.TYPE_MAPPINGS.values():
             # the parent exists in the Type Mappings
             output = par['slug'].replace('-', '_') + '___pred_id'
             break
     return output
Пример #14
0
 def alter_annotation_hiearchy(self):
     """ alters hierarchic annotations
         expressed in SKOS or OWL
         relations
     """
     lr = LinkRecursion()
     parents = lr.get_jsonldish_entity_parents(self.uuid, False)
     if isinstance(parents, list):
         if len(parents) > 0:
             # the item has SKOS / OWL parents
             parent_id = parents[-1]['id']
             lam = LinkAnnoManagement()
             lam.source_id = self.editorial_uuid  # so editorial uuid associated with change
             lam.replace_hierarchy(self.uuid, parent_id)
Пример #15
0
 def alter_annotation_hiearchy(self):
     """ alters hierarchic annotations
         expressed in SKOS or OWL
         relations
     """
     lr = LinkRecursion()
     parents = lr.get_jsonldish_entity_parents(self.uuid, False)
     if isinstance(parents, list):
         if len(parents) > 0:
             # the item has SKOS / OWL parents
             parent_id = parents[-1]['id']
             lam = LinkAnnoManagement()
             lam.source_id = self.editorial_uuid  # so editorial uuid associated with change
             lam.replace_hierarchy(self.uuid, parent_id)
Пример #16
0
 def _process_category(self):
     """ Finds category / type data ('class_uri' n the manifest table)
     For indexing as a type of predicate
     """
     if 'category' in self.oc_item.json_ld:
         for category in self.oc_item.json_ld['category']:
             # get the parent entities of the current category
             parents = LinkRecursion(
                 ).get_jsonldish_entity_parents(category)
             item_type_found = False
             active_predicate_field = False
             for index, parent in enumerate(parents):
                 # We're ignoring the 'slug' from the LinkRecursion parents
                 # Gets the last part of the URI
                 ptype = parent['id'].split('/')[-1]
                 # prefix_ptype = 'oc-gen-' + ptype
                 # consistent with other uses of slugs for solr fields
                 prefix_ptype = parent['slug']
                 if item_type_found is False:
                     if ptype == self.oc_item.item_type:
                         item_type_found = True
                 if active_predicate_field is not False:
                     solr_value = self._concat_solr_string_value(
                         prefix_ptype,
                         'id',
                         parent['id'],
                         parent['label']
                         )
                     if active_predicate_field not in self.fields:
                         self.fields[active_predicate_field] = []
                     self.fields[active_predicate_field].append(solr_value)
                 if item_type_found:
                     active_predicate_field = self._convert_slug_to_solr(
                         prefix_ptype) + '___pred_id'
Пример #17
0
 def make_reconcilation_json(self, search_term, geojson_ld):
     """ takes the geojson_ld and
         makes a reconcilation json
         output
     """
     recon_json = False
     if isinstance(geojson_ld, dict):
         self.geojson_ld = geojson_ld
         recon_json = []
         if 'oc-api:has-facets' in geojson_ld:
             facets = geojson_ld['oc-api:has-facets']
             for facet in facets:
                 if 'oc-api:has-id-options' in facet:
                     max_count = 0
                     for facet_value in facet['oc-api:has-id-options']:
                         if facet_value['count'] > max_count:
                             max_count = facet_value['count']
                     id_ranks = {}
                     for facet_value in facet['oc-api:has-id-options']:
                         id_uri = facet_value['rdfs:isDefinedBy']
                         lr = LinkRecursion()
                         lr.get_entity_children(id_uri)
                         if len(lr.child_entities) > 0:
                             levels = len(lr.child_entities) + 1
                         else:
                             levels = 1
                         # calculate a ranking for items, with more specific (fewer children)
                         # categories ranked higher
                         rank = (facet_value['count'] / levels) / max_count
                         match_count = self.count_label_matches(search_term, id_uri)
                         rank = rank + (match_count / levels)
                         id_ranks[id_uri] = rank
                     sorted_ids = sorted(id_ranks.items(), key=operator.itemgetter(1), reverse=True)
                     for id_key, rank in sorted_ids:
                         if len(recon_json) < 5:
                             for facet_value in facet['oc-api:has-id-options']:
                                 if facet_value['rdfs:isDefinedBy'] == id_key:
                                     rank_item =LastUpdatedOrderedDict()
                                     rank_item['id'] = id_key
                                     rank_item['label'] = facet_value['label']
                                     rank_item['rank'] = rank
                                     rank_item['related-labels'] = self.get_related_labels(id_key, True)
                                     recon_json.append(rank_item)
                                     break
                         else:
                             break
     return recon_json
Пример #18
0
 def get_numeric_range_from_children(self, predicate_uuids):
     """ gets numeric range from a list of predicates
         OR their children
     """
     if not isinstance(predicate_uuids, list):
         predicate_uuids = [str(predicate_uuids)]
     all_preds = []
     for predicate_uuid in predicate_uuids:
         all_preds.append(predicate_uuid)
         lr = LinkRecursion()
         lr.get_entity_children(predicate_uuid)
         for child_uuid_key, val in lr.child_entities.items():
             if child_uuid_key not in all_preds:
                 all_preds.append(child_uuid_key)
     # now get the summary, but don't allow recursive looking at children
     output = self.get_numeric_range(all_preds, False)
     return output
Пример #19
0
 def get_numeric_range_from_children(self, predicate_uuids):
     """ gets numeric range from a list of predicates
         OR their children
     """
     if not isinstance(predicate_uuids, list):
         predicate_uuids = [str(predicate_uuids)]
     all_preds = []
     for predicate_uuid in predicate_uuids:
         all_preds.append(predicate_uuid)
         lr = LinkRecursion()
         lr.get_entity_children(predicate_uuid)
         for child_uuid_key, val in lr.child_entities.items():
             if child_uuid_key not in all_preds:
                 all_preds.append(child_uuid_key)
     # now get the summary, but don't allow recursive looking at children
     output = self.get_numeric_range(all_preds, False)
     return output
Пример #20
0
 def get_new_hierarchic_types(self, revision_date):
     """ Gets a list of types items revised after a date with
         the default delimiter for hierachy, but no superior
         concept
     """
     rdate = datetime.strptime(revision_date, '%Y-%m-%d')
     new_hierarchic_list = []
     hi_types = Manifest.objects.filter(item_type='types',
                                        label__contains=self.HIERARCHY_DELIM,
                                        revised__gte=rdate)
     if(len(hi_types) > 0):
         for hi_type in hi_types:
             lr = LinkRecursion()
             lr.get_entity_parents(hi_type.uuid)
             if(len(lr.parent_entities) < 1):
                 # no superior parents found
                 new_hierarchic_list.append(hi_type)
     return new_hierarchic_list
Пример #21
0
 def make_root_entity_list(self, entity_list):
     """ makes a list of entities that
         are not children of other items in the hierarchy
     """
     root_entities = []
     for act_ent in entity_list:
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(act_ent.uri, False)
         if parents is False:
             ent_dict = LastUpdatedOrderedDict()
             ent_dict['id'] = act_ent.uri
             ent_dict['label'] = act_ent.label
             ent_dict['slug'] = act_ent.slug
             ent_dict['href'] = self.make_local_url(act_ent.uri)
             ent_dict['children'] = self.get_uri_children(act_ent.uri)
             if len(ent_dict['children']) > 0:
                 ent_dict['more'] = True
             root_entities.append(ent_dict)
     return root_entities
Пример #22
0
 def get_new_hierarchic_types(self, revision_date):
     """ Gets a list of types items revised after a date with
         the default delimiter for hierachy, but no superior
         concept
     """
     rdate = datetime.strptime(revision_date, '%Y-%m-%d')
     new_hierarchic_list = []
     hi_types = Manifest.objects.filter(
         item_type='types',
         label__contains=self.HIERARCHY_DELIM,
         revised__gte=rdate)
     if (len(hi_types) > 0):
         for hi_type in hi_types:
             lr = LinkRecursion()
             lr.get_entity_parents(hi_type.uuid)
             if (len(lr.parent_entities) < 1):
                 # no superior parents found
                 new_hierarchic_list.append(hi_type)
     return new_hierarchic_list
Пример #23
0
 def make_root_entity_list(self, entity_list):
     """ makes a list of entities that
         are not children of other items in the hierarchy
     """
     root_entities = []
     for act_ent in entity_list:
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(act_ent.uri, False)
         if parents is False:
             ent_dict = LastUpdatedOrderedDict()
             ent_dict['id'] = act_ent.uri
             ent_dict['label'] = act_ent.label
             ent_dict['slug'] = act_ent.slug
             ent_dict['href'] = self.make_local_url(act_ent.uri)
             ent_dict['children'] = self.get_uri_children(act_ent.uri)
             if len(ent_dict['children']) > 0:
                 ent_dict['more'] = True;
             root_entities.append(ent_dict)
     return root_entities
Пример #24
0
 def validate_make_eol_hierarchy(self, child_uri, parent_uri):
     """ Validated hierarchy relations for EOL entities.
         If a child already has a parent, this will not do anything
         otherwise it will create a hierachy relation
     """
     ok_create = False
     le_gen = LinkEntityGeneration()
     child_uri = le_gen.make_clean_uri(
         child_uri)  # strip off any cruft in the URI
     parent_uri = le_gen.make_clean_uri(parent_uri)
     ent = Entity()
     found = ent.dereference(child_uri)
     if found:
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(child_uri, False)
         if parents is False:
             # no parents, so OK to make an assertion
             ok_create = True
         else:
             if len(parents) == 0:
                 # no parents, so OK to make an assertion
                 ok_create = True
     else:
         ok_create = True  # the child does not yet exist, so OK to make the relation
         print('Getting missing data for: ' + child_uri)
         self.get_save_entity_label(child_uri)
     if ok_create:
         print('OK, make rel for: ' + child_uri + ' in ' + parent_uri)
         la = LinkAnnotation()
         la.subject = child_uri
         la.subject_type = 'uri'
         la.project_uuid = '0'
         la.source_id = 'manual-eol-manage'
         la.predicate_uri = self.CHILD_PARENT_REL
         la.object_uri = parent_uri
         la.sort = 1
         la.save()
     else:
         print('Already in hierarchy: ' + child_uri)
Пример #25
0
def get_entity_item_children_list(item, recursive=False):
    """Gets a list of children item dicts for an item entity object
    
    :param entity item: See the apps/entity/models entity object for a
        definition. 
    """
    use_id = item.slug
    if getattr(item, 'uuid', None):
        # Use the UUID for an item if we have it to look up parents.
        use_id = item.uuid
    item_children = LinkRecursion().get_entity_children(use_id,
                                                        recursive=recursive)
    return item_children
Пример #26
0
 def validate_make_eol_hierarchy(self, child_uri, parent_uri):
     """ Validated hierarchy relations for EOL entities.
         If a child already has a parent, this will not do anything
         otherwise it will create a hierachy relation
     """
     ok_create = False
     le_gen = LinkEntityGeneration()
     child_uri = le_gen.make_clean_uri(child_uri)  # strip off any cruft in the URI
     parent_uri = le_gen.make_clean_uri(parent_uri)
     ent = Entity()
     found = ent.dereference(child_uri)
     if found:
         lr = LinkRecursion()
         parents = lr.get_jsonldish_entity_parents(child_uri, False)
         if parents is False:
             # no parents, so OK to make an assertion
             ok_create = True
         else:
             if len(parents) == 0:
                 # no parents, so OK to make an assertion
                 ok_create = True
     else:
         ok_create = True  # the child does not yet exist, so OK to make the relation
         print('Getting missing data for: ' + child_uri)
         self.get_save_entity_label(child_uri)
     if ok_create:
         print('OK, make rel for: ' + child_uri + ' in ' + parent_uri)
         la = LinkAnnotation()
         la.subject = child_uri
         la.subject_type = 'uri'
         la.project_uuid = '0'
         la.source_id = 'manual-eol-manage'
         la.predicate_uri = self.CHILD_PARENT_REL
         la.object_uri = parent_uri
         la.sort = 1
         la.save()
     else:
         print('Already in hierarchy: ' + child_uri)
Пример #27
0
 def save_sort(self,
               predicate_uri='http://purl.org/ontology/olo/core#index'):
     """ Saves sort order """
     data_indexes = False
     if (self.graph is not False and self.vocabulary_uri is not False):
         data_indexes = {}
         index_pred = URIRef(predicate_uri)
         for s, p, o in self.graph.triples((None, index_pred, None)):
             subject_uri = s.__str__(
             )  # get the URI of the subject as a string
             index = o.__str__()  # get the URI of the object as a string
             data_indexes[subject_uri] = int(float(index)) + 1  # add 1
             # so first child does not have the exact same index as the first parent.
         if (len(data_indexes) > 0):
             for subject_uri, index in data_indexes.items():
                 print('Sorting: ' + subject_uri)
                 sort_strings = []
                 parents = LinkRecursion().get_jsonldish_entity_parents(
                     subject_uri)
                 for parent in parents:
                     if parent['id'] in data_indexes:
                         parent_index = data_indexes[parent['id']]
                     else:
                         parent_index = 0
                     parent_sort = self.sort_digits(parent_index)
                     sort_strings.append(parent_sort)
                 while len(sort_strings) < self.SORT_HIERARCHY_DEPTH:
                     deep_sort = self.sort_digits(0)
                     sort_strings.append(deep_sort)
                 sort = '-'.join(sort_strings)
                 try:
                     le = LinkEntity.objects.get(uri=subject_uri)
                 except LinkEntity.DoesNotExist:
                     le = False
                 if le is not False:
                     le.sort = sort
                     le.save()
     return data_indexes
Пример #28
0
 def _process_predicate_values(self, predicate_slug, predicate_type):
     # First generate the solr field name
     solr_field_name = self._convert_slug_to_solr(
         predicate_slug +
         self._get_predicate_type_string(
             predicate_type, prefix='___pred_')
         )
     # Then get the predicate values
     if solr_field_name not in self.fields:
         self.fields[solr_field_name] = []
     if self.oc_item.item_type == 'media' \
             or self.oc_item.item_type == 'documents':
     # we want to make joins easier for these types of items
         make_join_ids = True
     else:
         make_join_ids = False
     predicate_key = 'oc-pred:' + predicate_slug
     for obs_list in self.oc_item.json_ld['oc-gen:has-obs']:
         if predicate_key in obs_list:
             predicate_values = obs_list[predicate_key]
             for value in predicate_values:
                 if predicate_type == '@id':
                     if make_join_ids and 'subjects' in value['id']:
                         # case where we want to make a join field to link
                         # associated subjects items with media or document
                         # items allows join relationships between
                         # 'join___pred_id' and 'uuid' solr fields.
                         if 'join___pred_id' not in self.fields:
                             self.fields['join___pred_id'] = []
                         # get subjects UUID from the URI
                         sub_uuid = URImanagement.get_uuid_from_oc_uri(
                             value['id']
                             )
                         # append to the solr field for joins
                         self.fields['join___pred_id'].append(sub_uuid)
                     if predicate_slug != 'link':
                         active_solr_field = solr_field_name
                         parents = LinkRecursion(
                             ).get_jsonldish_entity_parents(
                             value['id']
                             )
                         all_obj_solr_field = 'obj_all___' + active_solr_field
                         if all_obj_solr_field not in self.fields:
                             self.fields[all_obj_solr_field] = []
                         for parent in parents:
                             if active_solr_field not in self.fields:
                                 self.fields[active_solr_field] = []
                             active_solr_value = \
                                 self._concat_solr_string_value(
                                     parent['slug'],
                                     self._get_predicate_type_string(
                                         parent['type']),
                                     parent['id'],
                                     parent['label']
                                 )
                             self.fields['text'] += ' ' + \
                                 parent['label'] + ' '
                             self.fields[active_solr_field].append(
                                 active_solr_value
                             )
                             # so all items in the hiearchy are present in the
                             # and can be queried, even if you don't know the parent
                             self.fields[all_obj_solr_field].append(
                                 active_solr_value
                             )
                             active_solr_field = self._convert_slug_to_solr(
                                 parent['slug']) + '___' + solr_field_name
                     else:
                         # case of a linking relation, don't bother looking
                         # up hierarchies or recording as a solr field, but
                         # check for image, other media, and document counts
                         if 'media' in value['id'] \
                                 and 'image' in value['type']:
                             self.fields['image_media_count'] += 1
                         elif 'media' in value['id'] \
                                 and 'image' not in value['type']:
                              # other types of media
                             self.fields['other_binary_media_count'] += 1
                         elif 'documents' in value['id']:
                             self.fields['document_count'] += 1
                         self.fields['text'] += value['label'] + ' '
                 elif predicate_type in [
                     'xsd:integer', 'xsd:double', 'xsd:boolean'
                         ]:
                     self.fields[solr_field_name].append(value)
                 elif predicate_type == 'xsd:date':
                     self.fields[solr_field_name].append(value +
                                                         'T00:00:00Z')
                 elif predicate_type == 'xsd:string':
                     self.fields['text'] += value['xsd:string'] + ' \n'
                     self.fields[solr_field_name].append(
                         value['xsd:string'])
                 else:
                     raise Exception("Error: Could not get predicate value")
             self.fields['text'] += ' \n'
Пример #29
0
 def get_parents(self, identifier):
     """ Gets SKOS or OWL parents for an entity """
     parents = LinkRecursion().get_jsonldish_entity_parents(identifier)
Пример #30
0
 def process_prop(self, props):
     """ processes 'prop' (property) parameters
         property parameters are tricky because they
         can come in hierarchies
         that's why there's some complexity to this
     """
     # is the property for the item itself, or for a related item?
     query_dict = {'fq': [],
                   'facet.field': [],
                   'stats.field': [],
                   'prequery-stats': [],
                   'facet.range': [],
                   'hl-queries': [],
                   'ranges': {}}
     fq_terms = []
     prop_path_lists = self.expand_hierarchy_options(props)
     for prop_path_list in prop_path_lists:
         i = 0
         path_list_len = len(prop_path_list)
         fq_path_terms = []
         act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR
         act_field_data_type = 'id'
         last_field_label = False  # needed for full text highlighting
         predicate_solr_slug = False
         for prop_slug in prop_path_list:
             field_prefix = self.get_related_slug_field_prefix(prop_slug)
             solr_f_prefix = field_prefix.replace('-', '_')
             db_prop_slug = self.clean_related_slug(prop_slug)
             l_prop_entity = False
             pred_prop_entity = False
             require_id_field = False
             if act_field_data_type == 'id':
                 # check entity exists, and save to memory
                 found = self.mem_cache_obj.check_entity_found(db_prop_slug, False)
                 if found:
                     entity = self.mem_cache_obj.get_entity(db_prop_slug, False)
                     last_field_label = entity.label
                     prop_slug = field_prefix + entity.slug
                     if entity.item_type == 'uri' and 'oc-gen' not in db_prop_slug:
                         if entity.entity_type == 'property':
                             pred_prop_entity = True
                             predicate_solr_slug = prop_slug.replace('-', '_')
                             l_prop_entity = True
                             children = self.mem_cache_obj.get_entity_children(entity.uri)
                             if len(children) > 0:
                                 # ok, this field has children. require it
                                 # to be treated as an ID field
                                 require_id_field = True
                     else:
                         if entity.item_type == 'predicates':
                             pred_prop_entity = True
                             predicate_solr_slug = prop_slug.replace('-', '_')
                             children = self.mem_cache_obj.get_entity_children(entity.uri)
                             if len(children) > 0:
                                 # ok, this field has children. require it
                                 # to be treated as an ID field
                                 require_id_field = True
                     if i == 0:
                         if 'oc-gen' in db_prop_slug:
                             # for open context categories / types
                             act_field_fq = self.get_parent_item_type_facet_field(entity.uri)
                             lr = LinkRecursion()
                             parents = lr.get_jsonldish_entity_parents(entity.uri)
                             if len(parents) > 1:
                                 try:
                                     p_slug = parents[-2]['slug']
                                     act_field_fq = p_slug.replace('-', '_') + '___pred_id'
                                     act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                                 except:
                                     pass
                         elif entity.item_type == 'uri':
                             act_field_fq = SolrDocument.ROOT_LINK_DATA_SOLR
                         elif entity.item_type == 'predicates':
                             temp_field_fq = self.get_parent_item_type_facet_field(entity.uri)
                             parents = self.mem_cache_obj.get_jsonldish_entity_parents(entity.uri)
                             if len(parents) > 1:
                                 try:
                                     p_slug = parents[-2]['slug']
                                     temp_field_fq = p_slug.replace('-', '_') + '___pred_id'
                                 except:
                                     print('Predicate Parent exception: '+ str(parents))
                                     temp_field_fq = False
                             if temp_field_fq is not False:
                                 act_field_fq = temp_field_fq
                             else:
                                 act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR
                         else:
                             act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR
                     # ---------------------------------------------------
                     # THIS PART BUILDS THE FACET-QUERY
                     # fq_path_term = fq_field + ':' + self.make_solr_value_from_entity(entity)
                     # the below is a bit of a hack. We should have a query field
                     # as with ___pred_ to query just the slug. But this works for now
                     fq_field = act_field_fq + '_fq'
                     if path_list_len >= 2 and act_field_data_type == 'id':
                         # could be an object deeper in the hierarchy, so allow the obj_all version
                         fq_path_term = '(' + fq_field + ':' + prop_slug
                         fq_path_term += ' OR obj_all___' + fq_field + ':' + prop_slug + ')'
                     else:
                         fq_path_term = fq_field + ':' + prop_slug
                     fq_path_terms.append(fq_path_term)
                     #---------------------------------------------------
                     #
                     #---------------------------------------------------
                     # THIS PART PREPARES FOR LOOPING OR FINAL FACET-FIELDS
                     #
                     # print('pred-solr-slug: ' + predicate_solr_slug)
                     field_parts = self.make_prop_solr_field_parts(entity)
                     act_field_data_type = field_parts['suffix']
                     if require_id_field:
                         act_field_data_type = 'id'
                         field_parts['suffix'] = 'id'
                     # check if the last or penultimate field has
                     # a different data-type (for linked-data)
                     if i >= (path_list_len - 2) \
                        and l_prop_entity:
                         dtypes = self.mem_cache_obj.get_dtypes(entity.uri)
                         if isinstance(dtypes, list):
                             # set te data type and the act-field
                             found = self.mem_cache_obj.check_entity_found(db_prop_slug, False)
                             if found:
                                 entity = self.mem_cache_obj.get_entity(db_prop_slug, False)
                                 entity.date_type = dtypes[0]  # store for later use
                                 self.mem_cache_obj.entities[db_prop_slug] = entity  # store for later use
                             act_field_data_type = self.get_solr_field_type(dtypes[0])
                     if predicate_solr_slug is False or pred_prop_entity:
                         act_field_fq = field_parts['prefix'] + '___pred_' + field_parts['suffix']
                         act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                         # get a facet on this field
                         if act_field_data_type != 'string':
                             # adds a prefix for related properties
                             ffield = solr_f_prefix + field_parts['prefix'] + '___pred_' + field_parts['suffix']
                             if ffield not in query_dict['facet.field'] \
                                and i >= (path_list_len - 1):
                                 query_dict['facet.field'].append(ffield)
                     else:
                         if act_field_data_type == 'id':
                             act_field_fq = 'obj_all___' + predicate_solr_slug \
                                            + '___pred_' + field_parts['suffix']
                             # get a facet on this field
                             if predicate_solr_slug != field_parts['prefix']:
                                 # the predicate_solr_slug is not the
                                 # prefix of the current field part, meaning
                                 # the field_parts[prefix] is the type, and
                                 # we want facets for the predicate -> type
                                 ffield = field_parts['prefix'] \
                                          + '___' \
                                          + predicate_solr_slug \
                                          + '___pred_' + field_parts['suffix']
                             else:
                                 # get facets for the predicate
                                 ffield = field_parts['prefix'] \
                                          + '___pred_' \
                                          + field_parts['suffix']
                             # adds a prefix, in case of a related property
                             ffield = solr_f_prefix + ffield
                             if ffield not in query_dict['facet.field'] \
                                and i >= (path_list_len - 1):
                                 query_dict['facet.field'].append(ffield)
                         else:
                             act_field_fq = predicate_solr_slug + '___pred_' + field_parts['suffix']
                     # -------------------------------------------
                     if act_field_data_type == 'numeric':
                         # print('Numeric field: ' + act_field)
                         act_field_fq = field_parts['prefix'] + '___pred_numeric'
                         act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                         query_dict = self.add_math_facet_ranges(query_dict,
                                                                 act_field_fq,
                                                                 entity)
                     elif act_field_data_type == 'date':
                         # print('Date field: ' + act_field)
                         act_field_fq = field_parts['prefix'] + '___pred_date'
                         act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                         query_dict = self.add_date_facet_ranges(query_dict,
                                                                 act_field_fq,
                                                                 entity)
                     # print('Current data type (' + str(i) + '): ' + act_field_data_type)
                     # print('Current field (' + str(i) + '): ' + act_field_fq)
                 i += 1
             elif act_field_data_type == 'string':
                 # case for a text search
                 # last_field_label = False  # turn off using the field label for highlighting
                 string_terms = self.prep_string_search_term(prop_slug)
                 for escaped_term in string_terms:
                     search_term = act_field_fq + ':' + escaped_term
                     if last_field_label is False:
                         query_dict['hl-queries'].append(escaped_term)
                     else:
                         query_dict['hl-queries'].append(last_field_label + ' ' + escaped_term)
                     fq_path_terms.append(search_term)
             elif act_field_data_type == 'numeric':
                 # numeric search. assume it's well formed solr numeric request
                 search_term = act_field_fq + ':' + prop_slug
                 fq_path_terms.append(search_term)
                 # now limit the numeric ranges from query to the range facets
                 query_dict = self.add_math_facet_ranges(query_dict,
                                                         act_field_fq,
                                                         False,
                                                         prop_slug)
             elif act_field_data_type == 'date':
                 # date search. assume it's well formed solr request
                 search_term = act_field_fq + ':' + prop_slug
                 fq_path_terms.append(search_term)
                 # now limit the date ranges from query to the range facets
                 query_dict = self.add_date_facet_ranges(query_dict,
                                                         act_field_fq,
                                                         False,
                                                         prop_slug)
         final_path_term = ' AND '.join(fq_path_terms)
         final_path_term = '(' + final_path_term + ')'
         fq_terms.append(final_path_term)
     fq_final = ' OR '.join(fq_terms)
     fq_final = '(' + fq_final + ')'
     query_dict['fq'].append(fq_final)
     return query_dict
Пример #31
0
 def _process_predicates(self):
     # Get list of predicates
     predicates = (item for item in self.oc_item.json_ld[
         '@context'][1].items() if item[0].startswith('oc-pred:'))
     # We need a list for "root___pred_id" because it is multi-valued
     self.fields[self.ROOT_PREDICATE_SOLR] = []
     for predicate in predicates:
         # We need the predicate's uuid to get its parents
         predicate_uuid = predicate[1]['owl:sameAs'].split('/')[-1]
         predicate_type = predicate[1]['type']
         parents = LinkRecursion(
             ).get_jsonldish_entity_parents(predicate_uuid)
         # Process parents
         link_predicate = False  # link predicates get special treatment
         for index, parent in enumerate(parents):
             if parent['slug'] == 'link':
                 link_predicate = True
             else:
             # add the label of the variable to the text field
                 self.fields['text'] += ' ' + parent['label'] + ' '
             # Treat the first parent in a special way
             if index == 0:
                 if link_predicate is False:
                     self.fields[self.ROOT_PREDICATE_SOLR].append(
                         self._concat_solr_string_value(
                             parent['slug'],
                             self._get_predicate_type_string(
                                 parent['type']),
                             parent['id'],
                             parent['label']
                             )
                         )
                 # If it's the only item, process its predicate values
                 if len(parents) == 1:
                     self._process_predicate_values(
                         parent['slug'],
                         predicate_type
                     )
             else:
                 # Process additional items
                 # Create solr field name using parent slug
                 solr_field_name = \
                     parents[index - 1]['slug'] + '___pred_id'
                 solr_field_name = self._convert_slug_to_solr(
                     solr_field_name
                     )
                 if link_predicate is False:
                     if solr_field_name not in self.fields:
                         self.fields[solr_field_name] = []
                     # Add slug and label as json values
                     self.fields[solr_field_name].append(
                         self._concat_solr_string_value(
                             parent['slug'],
                             self._get_predicate_type_string(
                                 parent['type']),
                             parent['id'],
                             parent['label'])
                         )
                 # If this is the last item, process the predicate values
                 if index == len(parents) - 1:
                     self._process_predicate_values(
                         parent['slug'],
                         predicate_type)
Пример #32
0
 def process_prop(self, props):
     """ processes 'prop' (property) parameters
         property parameters are tricky because they
         can come in hierarchies
         that's why there's some complexity to this
     """
     # is the property for the item itself, or for a related item?
     query_dict = {'fq': [],
                   'facet.field': [],
                   'stats.field': [],
                   'prequery-stats': [],
                   'facet.range': [],
                   'hl-queries': [],
                   'ranges': {}}
     fq_terms = []
     prop_path_lists = self.expand_hierarchy_options(props)
     for prop_path_list in prop_path_lists:
         i = 0
         path_list_len = len(prop_path_list)
         fq_path_terms = []
         act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR
         act_field_data_type = 'id'
         last_field_label = False  # needed for full text highlighting
         predicate_solr_slug = False
         for prop_slug in prop_path_list:
             field_prefix = self.get_related_slug_field_prefix(prop_slug)
             solr_f_prefix = field_prefix.replace('-', '_')
             db_prop_slug = self.clean_related_slug(prop_slug)
             l_prop_entity = False
             pred_prop_entity = False
             require_id_field = False
             if act_field_data_type == 'id':
                 # check entity exists, and save to memory
                 entity = self.m_cache.get_entity(db_prop_slug)
                 if entity:
                     last_field_label = entity.label
                     prop_slug = field_prefix + entity.slug
                     if entity.item_type == 'uri' and not db_prop_slug.startswith('oc-gen'):
                         if entity.entity_type == 'property':
                             pred_prop_entity = True
                             predicate_solr_slug = prop_slug.replace('-', '_')
                             l_prop_entity = True
                             children = LinkRecursion().get_entity_children(entity.uri)
                             if len(children) > 0:
                                 # ok, this field has children. require it
                                 # to be treated as an ID field
                                 require_id_field = True
                     else:
                         if entity.item_type == 'predicates':
                             pred_prop_entity = True
                             predicate_solr_slug = prop_slug.replace('-', '_')
                             children = LinkRecursion().get_entity_children(entity.uri)
                             if len(children) > 0:
                                 # ok, this field has children. require it
                                 # to be treated as an ID field
                                 require_id_field = True
                     if i == 0:
                         if db_prop_slug.startswith('oc-gen'):
                             # for open context categories / types
                             act_field_fq = self.get_parent_item_type_facet_field(entity.uri)
                             lr = LinkRecursion()
                             parents = lr.get_jsonldish_entity_parents(entity.uri)
                             if len(parents) > 1:
                                 try:
                                     p_slug = parents[-2]['slug']
                                     act_field_fq = p_slug.replace('-', '_') + '___pred_id'
                                     act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                                 except:
                                     pass
                                     print('Predicate Parent exception: '+ str(parents))
                         elif entity.item_type == 'uri':
                             act_field_fq = SolrDocument.ROOT_LINK_DATA_SOLR
                         elif entity.item_type == 'predicates':
                             temp_field_fq = self.get_parent_item_type_facet_field(entity.uri)
                             lr = LinkRecursion()
                             parents = lr.get_jsonldish_entity_parents(entity.uri)
                             if len(parents) > 1:
                                 try:
                                     p_slug = parents[-2]['slug']
                                     temp_field_fq = p_slug.replace('-', '_') + '___pred_id'
                                 except:
                                     print('Predicate Parent exception: '+ str(parents))
                                     temp_field_fq = False
                             if temp_field_fq is not False:
                                 act_field_fq = temp_field_fq
                             else:
                                 act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR
                         else:
                             act_field_fq = SolrDocument.ROOT_PREDICATE_SOLR
                     # ---------------------------------------------------
                     # THIS PART BUILDS THE FACET-QUERY
                     # fq_path_term = fq_field + ':' + self.make_solr_value_from_entity(entity)
                     # the below is a bit of a hack. We should have a query field
                     # as with ___pred_ to query just the slug. But this works for now
                     fq_field = act_field_fq + '_fq'
                     if path_list_len >= 2 and act_field_data_type == 'id':
                         # could be an object deeper in the hierarchy, so allow the obj_all version
                         fq_path_term = '(' + fq_field + ':' + prop_slug
                         fq_path_term += ' OR obj_all___' + fq_field + ':' + prop_slug + ')'
                     else:
                         fq_path_term = fq_field + ':' + prop_slug
                     fq_path_terms.append(fq_path_term)
                     #---------------------------------------------------
                     #
                     #---------------------------------------------------
                     # THIS PART PREPARES FOR LOOPING OR FINAL FACET-FIELDS
                     #
                     # print('pred-solr-slug: ' + predicate_solr_slug)
                     field_parts = self.make_prop_solr_field_parts(entity)
                     act_field_data_type = field_parts['suffix']
                     if require_id_field:
                         act_field_data_type = 'id'
                         field_parts['suffix'] = 'id'
                     # check if the last or penultimate field has
                     # a different data-type (for linked-data)
                     if i >= (path_list_len - 2) \
                        and l_prop_entity:
                         dtypes = self.s_cache.get_dtypes(entity.uri)
                         if isinstance(dtypes, list):
                             # set the data type and the act-field
                             act_field_data_type = self.get_solr_field_type(dtypes[0])
                     if not predicate_solr_slug or pred_prop_entity:
                         act_field_fq = field_parts['prefix'] + '___pred_' + field_parts['suffix']
                         act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                         # get a facet on this field
                         if act_field_data_type != 'string':
                             # adds a prefix for related properties
                             ffield = solr_f_prefix + field_parts['prefix'] + '___pred_' + field_parts['suffix']
                             if ffield not in query_dict['facet.field'] and \
                                i >= (path_list_len - 1):
                                 query_dict['facet.field'].append(ffield)
                     else:
                         if act_field_data_type == 'id':
                             act_field_fq = 'obj_all___' + predicate_solr_slug \
                                            + '___pred_' + field_parts['suffix']
                             # get a facet on this field
                             if predicate_solr_slug != field_parts['prefix']:
                                 # the predicate_solr_slug is not the
                                 # prefix of the current field part, meaning
                                 # the field_parts[prefix] is the type, and
                                 # we want facets for the predicate -> type
                                 ffield = field_parts['prefix'] \
                                          + '___' \
                                          + predicate_solr_slug \
                                          + '___pred_' + field_parts['suffix']
                             else:
                                 # get facets for the predicate
                                 ffield = field_parts['prefix'] \
                                          + '___pred_' \
                                          + field_parts['suffix']
                             # adds a prefix, in case of a related property
                             ffield = solr_f_prefix + ffield
                             if ffield not in query_dict['facet.field'] \
                                and i >= (path_list_len - 1):
                                 query_dict['facet.field'].append(ffield)
                         else:
                             act_field_fq = predicate_solr_slug + '___pred_' + field_parts['suffix']
                     # -------------------------------------------
                     if act_field_data_type == 'numeric':
                         # print('Numeric field: ' + act_field)
                         act_field_fq = field_parts['prefix'] + '___pred_numeric'
                         act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                         query_dict = self.add_math_facet_ranges(query_dict,
                                                                 act_field_fq,
                                                                 entity)
                     elif act_field_data_type == 'date':
                         # print('Date field: ' + act_field)
                         act_field_fq = field_parts['prefix'] + '___pred_date'
                         act_field_fq = self.correct_solr_prefix_for_fq(solr_f_prefix, act_field_fq)
                         query_dict = self.add_date_facet_ranges(query_dict,
                                                                 act_field_fq,
                                                                 entity)
                     # print('Current data type (' + str(i) + '): ' + act_field_data_type)
                     # print('Current field (' + str(i) + '): ' + act_field_fq)
                 i += 1
             elif act_field_data_type == 'string':
                 # case for a text search
                 # last_field_label = False  # turn off using the field label for highlighting
                 string_terms = self.prep_string_search_term(prop_slug)
                 for escaped_term in string_terms:
                     search_term = act_field_fq + ':' + escaped_term
                     if last_field_label is False:
                         query_dict['hl-queries'].append(escaped_term)
                     else:
                         query_dict['hl-queries'].append(last_field_label + ' ' + escaped_term)
                     fq_path_terms.append(search_term)
             elif act_field_data_type == 'numeric':
                 # numeric search. assume it's well formed solr numeric request
                 search_term = act_field_fq + ':' + prop_slug
                 fq_path_terms.append(search_term)
                 # now limit the numeric ranges from query to the range facets
                 query_dict = self.add_math_facet_ranges(query_dict,
                                                         act_field_fq,
                                                         False,
                                                         prop_slug)
             elif act_field_data_type == 'date':
                 # date search. assume it's well formed solr request
                 search_term = act_field_fq + ':' + prop_slug
                 fq_path_terms.append(search_term)
                 # now limit the date ranges from query to the range facets
                 query_dict = self.add_date_facet_ranges(query_dict,
                                                         act_field_fq,
                                                         False,
                                                         prop_slug)
         final_path_term = ' AND '.join(fq_path_terms)
         final_path_term = '(' + final_path_term + ')'
         fq_terms.append(final_path_term)
     fq_final = ' OR '.join(fq_terms)
     fq_final = '(' + fq_final + ')'
     query_dict['fq'].append(fq_final)
     return query_dict
Пример #33
0
 def get_description_tree(self,
                          entity_obj,
                          depth=1,
                          first_time=True,
                          item_type=False,
                          class_uri=False):
     """ gets a hierarchy for descriptive
         predicates and types
     """
     lr = LinkRecursion()
     if entity_obj.item_type == 'projects':
         tree = self.make_containment_item(entity_obj)
         if item_type is not False and class_uri is False:
             # returns the classes associated with an item_type for a project
             tree['label'] = tree['label'] + ', ' + item_type
             tree['children'] = self.get_proj_type_classes_items(entity_obj.uuid, 
                                                                 item_type)
         elif item_type is not False and class_uri is not False:
             # returns the predicates associated with an item_type and class_uri
             tree['children'] = self.get_proj_type_class_preds(entity_obj.uuid,
                                                               item_type,
                                                               class_uri,
                                                               True)
         else:
             # project root, returns the item_types for the project
             tree['children'] = self.get_proj_types(entity_obj.uuid)
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'predicates':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         child_list = lr.get_entity_children(entity_obj.uuid, False)
         if len(child_list) > 0:
             for child_uuid in child_list:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_containment_children(child_ent,
                                                               depth - 1,
                                                               False)
                     else:
                         child = self.make_containment_item(child_ent)
                     tree['children'].append(child)
         elif entity_obj.data_type == 'id':
             top_types = lr.get_pred_top_rank_types(entity_obj.uuid)
             for top_type in top_types:
                 uri = top_type['id']
                 uuid = URImanagement.get_uuid_from_oc_uri(uri)
                 item = False
                 if depth > 1:
                     child_ent = Entity()
                     found = child_ent.dereference(uuid)
                     if found:
                         item = self.get_description_tree(child_ent,
                                                          depth - 1,
                                                          False)
                 else:
                     item = LastUpdatedOrderedDict()
                     item['id'] = uuid
                     item['label'] = top_type['label']
                     item['class_uri'] = 'type'
                     item['class_label'] = 'type'
                 tree['children'].append(item)
             tree['children'] = self.sort_children_by_label(tree['children'])
         else:
             pass
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'types':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         act_children = lr.get_entity_children(entity_obj.uuid, False)
         for child_uuid in act_children:
             if child_uuid != entity_obj.uuid:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_description_tree(child_ent,
                                                           depth - 1,
                                                           False)
                     else:
                         child = self.make_containment_item(child_ent)
                     child['class_uri'] = 'type'
                     child['class_label'] = 'type'
                     tree['children'].append(child)
         if len(tree['children']) == 0:
             tree.pop('children', None)
         else:
             tree['children'] = self.sort_children_by_label(tree['children'])
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     else:
         output = []
     return output
Пример #34
0
 def get_description_tree(self,
                          entity_obj,
                          depth=1,
                          first_time=True,
                          item_type=False,
                          class_uri=False):
     """ gets a hierarchy for descriptive
         predicates and types
     """
     lr = LinkRecursion()
     if entity_obj.item_type == 'projects':
         tree = self.make_containment_item(entity_obj)
         if item_type is not False and class_uri is False:
             # returns the classes associated with an item_type for a project
             tree['label'] = tree['label'] + ', ' + item_type
             tree['children'] = self.get_proj_type_classes_items(
                 entity_obj.uuid, item_type)
         elif item_type is not False and class_uri is not False:
             # returns the predicates associated with an item_type and class_uri
             tree['children'] = self.get_proj_type_class_preds(
                 entity_obj.uuid, item_type, class_uri, True)
         else:
             # project root, returns the item_types for the project
             tree['children'] = self.get_proj_types(entity_obj.uuid)
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'predicates':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         child_list = lr.get_entity_children(entity_obj.uuid, False)
         if len(child_list) > 0:
             for child_uuid in child_list:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_containment_children(
                             child_ent, depth - 1, False)
                     else:
                         child = self.make_containment_item(child_ent)
                     tree['children'].append(child)
         elif entity_obj.data_type == 'id':
             top_types = lr.get_pred_top_rank_types(entity_obj.uuid)
             for top_type in top_types:
                 uri = top_type['id']
                 uuid = URImanagement.get_uuid_from_oc_uri(uri)
                 item = False
                 if depth > 1:
                     child_ent = Entity()
                     found = child_ent.dereference(uuid)
                     if found:
                         item = self.get_description_tree(
                             child_ent, depth - 1, False)
                 else:
                     item = LastUpdatedOrderedDict()
                     item['id'] = uuid
                     item['label'] = top_type['label']
                     item['class_uri'] = 'type'
                     item['class_label'] = 'type'
                 tree['children'].append(item)
             tree['children'] = self.sort_children_by_label(
                 tree['children'])
         else:
             pass
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     elif entity_obj.item_type == 'types':
         tree = self.make_containment_item(entity_obj)
         tree['children'] = []
         act_children = lr.get_entity_children(entity_obj.uuid, False)
         for child_uuid in act_children:
             if child_uuid != entity_obj.uuid:
                 child_ent = Entity()
                 found = child_ent.dereference(child_uuid)
                 if found:
                     if depth > 1:
                         child = self.get_description_tree(
                             child_ent, depth - 1, False)
                     else:
                         child = self.make_containment_item(child_ent)
                     child['class_uri'] = 'type'
                     child['class_label'] = 'type'
                     tree['children'].append(child)
         if len(tree['children']) == 0:
             tree.pop('children', None)
         else:
             tree['children'] = self.sort_children_by_label(
                 tree['children'])
         if first_time:
             output = []
             output.append(tree)
         else:
             output = tree
     else:
         output = []
     return output
Пример #35
0
 def _process_associated_linkedata(self):
     """ Finds linked data to add to index
     """
     if '@graph' in self.oc_item.json_ld:
         for entity in self.oc_item.json_ld['@graph']:
             entity_id = self.get_entity_id(entity)
             if 'oc-pred:' in entity_id:
                 # a predicate with linked data
                 pred_slug_id = entity_id
                 pred_datatype = self.get_predicate_datatype(pred_slug_id)
                 pres_solr_datatype = self._get_predicate_type_string(pred_datatype)
                 obs_values = self.get_linked_predicate_values(pred_slug_id)  # values for predicate in observations
                 for equiv_pred in self.LD_EQUIVALENT_PREDICATES:
                     if equiv_pred in entity:
                         # a semantic equivalence predicate exists for this oc-pred
                         for equiv_entity in entity[equiv_pred]:
                             equiv_id = self.get_entity_id(equiv_entity)
                             parents = LinkRecursion().get_jsonldish_entity_parents(equiv_id)
                             act_solr_field = self.ROOT_LINK_DATA_SOLR
                             last_index = len(parents) - 1
                             for index, parent in enumerate(parents):
                                 if index == last_index:
                                     # use the predicates solr-field type, which may be numeric, date, string, or ID
                                     act_solr_datatype = pres_solr_datatype
                                 else:
                                     # use an id field type, since this is in a hierarchy that contains children
                                     act_solr_datatype = 'id'
                                 solr_value = self._concat_solr_string_value(parent['slug'],
                                                                             act_solr_datatype,
                                                                             parent['id'],
                                                                             parent['label'])
                                 if act_solr_field not in self.fields:
                                     self.fields[act_solr_field] = []
                                 self.fields[act_solr_field].append(solr_value)
                                 last_linked_pred_label = parent['label']
                                 last_linked_pred_uri = parent['id']
                                 act_solr_field = \
                                     self._convert_slug_to_solr(parent['slug'])\
                                     + '___pred_' \
                                     + act_solr_datatype
                             # since we ended the loop above by creating a solr field, let's make sure it's added to the solrdoc
                             self.fields['text'] += last_linked_pred_label + ': \n'
                             act_pred_root_act_solr_field = act_solr_field
                             if act_pred_root_act_solr_field not in self.fields:
                                 self.fields[act_pred_root_act_solr_field] = []
                             # --------------------------------
                             # Now we handle the objects of this predicate!
                             # 1. obs_values come from the item's observations,
                             # 2. we treat literals differently than URI objects, since URI objects maybe in a hierarchy
                             # --------------------------------
                             if pred_datatype != '@id' and obs_values is not False:
                                 # objects of this predicate are literals
                                 for obs_val in obs_values:
                                     if isinstance(obs_val, dict):
                                         if pred_datatype in obs_val:
                                             self.fields[act_pred_root_act_solr_field].append(obs_val[pred_datatype])
                                             self.fields['text'] += obs_val[pred_datatype] + '\n'
                                     else:
                                         self.fields[act_pred_root_act_solr_field].append(obs_val)
                                         self.fields['text'] += str(obs_val) + '\n'
                             else:
                                 # objects of this predicate IDed by URIs
                                 if obs_values is not False:
                                     for obs_val in obs_values:
                                         # gets the id for the observation object
                                         obs_object_id = self.get_entity_id(obs_val)
                                         # gets linked data equivalents of the obs-object-id
                                         use_objects = self.get_equivalent_linked_data(obs_object_id)
                                         if use_objects is False:
                                             # no linked data equivalents found, so make a list w. 1 item
                                             use_objects = [{'id': obs_object_id}]
                                         for use_obj in use_objects:
                                             # make sure the active solr field is reset to be from
                                             # the last equivalent predicates. important if we're looping
                                             # through multiple use_objects
                                             last_object_uri = False
                                             last_object_label = ''
                                             act_solr_field = act_pred_root_act_solr_field
                                             #-------------------------------
                                             # Now make a solr field for ALL the objects (parents, childred)
                                             # using this predicate
                                             all_obj_solr_field = 'obj_all___' + act_pred_root_act_solr_field
                                             if all_obj_solr_field not in self.fields:
                                                 self.fields[all_obj_solr_field] = []
                                             # URI objects can be in hierarchies, look for these!
                                             object_id = self.get_entity_id(use_obj)
                                             parents = LinkRecursion().get_jsonldish_entity_parents(object_id)
                                             for index, parent in enumerate(parents):
                                                 solr_value = self._concat_solr_string_value(parent['slug'],
                                                                                             'id',
                                                                                             parent['id'],
                                                                                             parent['label'])
                                                 last_object_uri = parent['id']
                                                 if act_solr_field not in self.fields:
                                                     self.fields[act_solr_field] = []
                                                 if parent['ld_object_ok']:
                                                     # only add this if it's OK for linked data use
                                                     # in presenting a facet
                                                     self.fields[act_solr_field].append(solr_value)
                                                 #-------------------------------
                                                 # This way, you don't need to know a parent to search
                                                 # for a child. Since facets aren't made with this,
                                                 # it's OK for on-linked-data-ok objects to be used
                                                 #-------------------------------
                                                 if solr_value not in self.fields[all_obj_solr_field]:
                                                     self.fields[all_obj_solr_field].append(solr_value)
                                                 if parent['id'] not in self.fields['text']:
                                                     self.fields['text'] += parent['id'] + ' '
                                                     self.fields['text'] += parent['label'] + '\n'
                                                 act_solr_field = \
                                                     self._convert_slug_to_solr(parent['slug']) \
                                                     + '___' + act_pred_root_act_solr_field
                                             if last_object_uri is not False:
                                                 self.process_object_uri(last_object_uri)