Пример #1
0
 def make_spatial_context_json_ld(self, raw_contexts):
     """ adds context information, if present """
     #adds parent contents, with different treenodes
     first_node = True
     act_context = LastUpdatedOrderedDict()
     for tree_node, r_parents in raw_contexts.items():
         act_context = LastUpdatedOrderedDict()
         # change the parent node to context not contents
         tree_node = tree_node.replace('contents', 'context')
         act_context['id'] = tree_node
         act_context['type'] = 'oc-gen:contexts'
         # now reverse the list of parent contexts, so top most parent context is first,
         # followed by children contexts
         parents = r_parents[::-1]
         parts_json_ld = PartsJsonLD()
         parts_json_ld.class_uri_list += self.class_uri_list
         if len(parents) > 3:
             # lots of parents, so probably not worth trying to use the cache.
             # makes more sense look these all up in the manifest in 1 query
             # get manifest objects for all the parent items, for use in making JSON_LD
             parts_json_ld.get_manifest_objects_from_uuids(parents)
         for parent_uuid in parents:
             act_context = parts_json_ld.addto_predicate_list(
                 act_context, ItemKeys.PREDICATES_OCGEN_HASPATHITEMS,
                 parent_uuid, 'subjects')
             self.class_uri_list += parts_json_ld.class_uri_list
         if first_node:
             # set aside a list of parent labels to use for making a dc-term:title
             first_node = False
             if ItemKeys.PREDICATES_OCGEN_HASPATHITEMS in act_context:
                 for parent_obj in act_context[
                         ItemKeys.PREDICATES_OCGEN_HASPATHITEMS]:
                     self.parent_context_list.append(parent_obj['label'])
     return act_context
Пример #2
0
 def web_dump(self, table_id):
     """ writes a csv file for a Web dump instead of a file save """
     self.table_id = table_id
     self.get_table_fields()
     self.get_max_row_number()
     filename = 'oc-table-' + table_id + '.csv'
     response = HttpResponse(content_type='text/csv')
     response[
         'Content-Disposition'] = 'attachment; filename="' + filename + '"'
     writer = csv.writer(response, dialect='excel', quoting=csv.QUOTE_ALL)
     writer.writerow(
         self.field_name_row)  # write the field labels in first row
     cells = ExportTableDump(self.table_id).cells
     written_rows = 0
     last_row_num = 1
     act_row_dict = LastUpdatedOrderedDict()
     for cell in cells:
         if cell['row_num'] > last_row_num:
             # we've advanced to the next row, time to write the
             # active row to the csv file
             ok = self.compose_write_row(writer, act_row_dict)
             if ok:
                 written_rows += 1
             act_row_dict = None
             act_row_dict = LastUpdatedOrderedDict()
             last_row_num = cell['row_num']
         act_row_dict[cell['field_num']] = cell['record']
     # now right the last row
     ok = self.compose_write_row(writer, act_row_dict)
     return response
Пример #3
0
 def classify_xml_attributes_to_objects(self, cont_vocabs):
     """ classifies open context types used with each attribute """
     objects_dict = LastUpdatedOrderedDict()
     for vocab in cont_vocabs:
         vocab_id = vocab.xpath('VocabID')[0].text
         obj_dict = LastUpdatedOrderedDict()
         obj_dict['id'] = vocab_id
         if len(vocab.xpath('Arch16n')) > 0:
             obj_dict['label'] = vocab.xpath('Arch16n')[0].text
         else:
             obj_dict['label'] = vocab.xpath('VocabName')[0].text
         obj_dict['faims_attrib_id'] = vocab.xpath('AttributeID')[0].text
         obj_dict['faims_internal_str'] = vocab.xpath('VocabName')[0].text
         sort_str = vocab.xpath('VocabCountOrder')[0].text
         try:
             obj_dict['sort'] = float(sort_str)
         except:
             obj_dict['sort'] = 0
         obj_dict['note'] = None
         notes = vocab.xpath('VocabDescription')
         for act_note_node in notes:
             act_note = act_note_node.text
             if '<' in act_note and '>' in act_note:
                 # has escaped HTML, unescape it
                 obj_dict['note'] = act_note.unescape(act_note)
             else:
                 obj_dict['note'] = act_note
         objects_dict[vocab_id] = obj_dict
     return objects_dict
Пример #4
0
 def add_project_types_with_annotations_to_graph(self, graph):
     """ adds project types that have annotations """
     type_sql_dict_list = self.get_working_project_types()
     if isinstance(type_sql_dict_list, list):
         # consolidate things so a given type is given once in the list
         # of a graph. To do so, we first put everything in a all_types
         # dict
         all_types = LastUpdatedOrderedDict()
         for sql_dict in type_sql_dict_list:
             type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'],
                                                  'types')
             if type_uri not in all_types:
                 act_type = LastUpdatedOrderedDict()
                 act_type['@id'] = type_uri
                 act_type['label'] = sql_dict['type_label']
                 act_type['owl:sameAs'] = URImanagement.make_oc_uri(
                     sql_dict['type_slug'], 'types')
                 act_type['uuid'] = sql_dict['type_uuid']
                 act_type['slug'] = sql_dict['type_slug']
             else:
                 act_type = all_types[type_uri]
             la_pred_uri = URImanagement.prefix_common_uri(
                 sql_dict['predicate_uri'])
             act_type = self.add_unique_object_dict_to_pred(
                 act_type, la_pred_uri, sql_dict['object_uri'])
             all_types[type_uri] = act_type
         for type_uri, act_type in all_types.items():
             graph.append(act_type)
     return graph
Пример #5
0
 def get_item_media_files(self, man_obj):
     """ gets media file uris for archiving """
     files_dict = LastUpdatedOrderedDict()
     if isinstance(man_obj, Manifest):
         med_files = Mediafile.objects\
                              .filter(uuid=man_obj.uuid,
                                      file_type__in=self.ARCHIVE_FILE_TYPES)\
                              .order_by('-filesize')
         #  print('found files: ' + str(len(med_files)))
         for act_type in self.ARCHIVE_FILE_TYPES:
             for med_file in med_files:
                 if med_file.file_type == act_type:
                     extension = ''
                     frag = None
                     file_uri = med_file.file_uri
                     if '#' in file_uri:
                         file_ex = file_uri.split('#')
                         file_uri = file_ex[0]
                         frag = file_ex[-1]
                     if file_uri not in files_dict:
                         act_dict = LastUpdatedOrderedDict()
                         act_dict['filename'] = self.make_archival_file_name(med_file.file_type,
                                                                             man_obj.slug,
                                                                             file_uri)
                         act_dict['dc-terms:isPartOf'] = URImanagement.make_oc_uri(man_obj.uuid,
                                                                                   man_obj.item_type)
                         act_dict['type'] = []
                         files_dict[file_uri] = act_dict
                     files_dict[file_uri]['type'].append(med_file.file_type)
     return files_dict
Пример #6
0
 def add_json_ld_descriptive_assertions(self, json_ld):
     """
     adds descriptive assertions (descriptive properties, non spatial containment links)
     to items, as parts of Observations
     """
     observations = []
     working_obs = LastUpdatedOrderedDict()
     act_obs = LastUpdatedOrderedDict()
     for assertion in self.assertions:
         act_obs_num = assertion.obs_num
         if assertion.predicate_uuid in self.NO_OBS_ASSERTION_PREDS:
             # we've got a predicate that does not belong in an observation
             json_ld = self.add_json_ld_direct_assertion(json_ld, assertion)
         else:
             if act_obs_num not in working_obs:
                 # we've got a new observation, so make a new observation object for it
                 act_obs = self.make_json_ld_obs_dict_w_metadata(assertion)
                 working_obs[act_obs_num] = act_obs
             else:
                 act_obs = working_obs[act_obs_num]
             act_obs = self.add_json_ld_assertion_predicate_objects(
                 act_obs, assertion)
             working_obs[act_obs_num] = act_obs
     # now that we've gotten observations made,
     # add them to the final list of observations
     for obs_num in self.obs_list:
         if obs_num in working_obs:
             act_obs = working_obs[obs_num]
             observations.append(act_obs)
     if len(observations) > 0:
         json_ld[ItemKeys.PREDICATES_OCGEN_HASOBS] = observations
     return json_ld
Пример #7
0
 def db_save_reconcile_entity_predicates_types(self, act_dir):
     """ saves predicates and type items to the
         Open Context database, and / or reconciles these
         items with previously saved items from the same project
     """
     key = self.oc_config_entity_types
     json_obj = self.fm.get_dict_from_file(key, act_dir)
     if json_obj is None:
         print('Need to 1st generate an attributes file from the ArchEnts!')
         ok = False
     else:
         # we have JSON with dictionary for the entity_types
         self.entity_types = json_obj
         make_entity_types_assertions = False
         for faims_ent_type_id, ent_dict in json_obj.items():
             if isinstance(ent_dict['item_type'], str) \
                and ent_dict['add_type_as_attribute']:
                 # OK we have some items that need entity types made as
                 # a descriptive attribute
                 make_entity_types_assertions = True
                 break
         if make_entity_types_assertions:
             # we have entity_types that need to have a descriptive
             # predicate, so create a new predicate in Open Context
             # to describe entity_types for this project
             sup_dict = LastUpdatedOrderedDict()
             sup_dict[self.reconcile_key] = self.ent_type_pred_sup_id
             pm = PredicateManagement()
             pm.project_uuid = self.project_uuid
             pm.source_id = self.source_id
             pm.sup_dict = sup_dict
             pm.sup_reconcile_key = self.reconcile_key
             pm.sup_reconcile_value = self.ent_type_pred_sup_id
             pred_obj = pm.get_make_predicate(
                 self.FAIMS_ENTITY_TYPE_PREDICATE_LABEL, 'variable', 'id')
             if pred_obj is not False:
                 # we reconciled or created the predicate!
                 # now we mint oc_types for all the entity_types
                 predicate_uuid = str(pred_obj.uuid)
                 for faims_ent_type_id, ent_dict in json_obj.items():
                     if isinstance(ent_dict['item_type'], str) \
                        and ent_dict['add_type_as_attribute']:
                         # OK, we have an item entity type to be used as a description
                         sup_dict = LastUpdatedOrderedDict()
                         sup_dict[self.reconcile_key] = faims_ent_type_id
                         tm = TypeManagement()
                         tm.project_uuid = self.project_uuid
                         tm.source_id = self.source_id
                         tm.sup_dict = sup_dict
                         tm.sup_reconcile_key = self.reconcile_key
                         tm.sup_reconcile_value = faims_ent_type_id
                         type_obj = tm.get_make_type_within_pred_uuid(
                             predicate_uuid, ent_dict['label'])
                         if type_obj is not False:
                             # we have reconciled the type!
                             ent_dict['type_uuid'] = str(type_obj.uuid)
                             ent_dict['predicate_uuid'] = predicate_uuid
                             self.entity_types[faims_ent_type_id] = ent_dict
             # now save the results
             self.fm.save_serialized_json(key, act_dir, self.entity_types)
Пример #8
0
 def get_arachne_comparanda(self):
     """ get a type item including lots of useful related data
     """
     label = self.item_json['label']
     # keyword = self.type_german_mappings(label)
     arachne_search_url = self.check_arachne_relation()
     if isinstance(arachne_search_url, str):
         # print('search url: ' + arachne_search_url)
         a_api = ArachneAPI()
         a_api.get_results_from_search_url(arachne_search_url)
         if a_api.results is not False:
             editorial_pred = LastUpdatedOrderedDict()
             editorial_pred[
                 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#editorialNote'
             editorial_pred['slug'] = 'skos-editorialnote'
             editorial_pred['label'] = 'Arachne comparative material'
             editorial_pred['oc-gen:predType'] = 'variable'
             editorial_pred['type'] = 'xsd:string'
             example_pred = LastUpdatedOrderedDict()
             example_pred[
                 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#example'
             example_pred['slug'] = 'skos-example'
             example_pred['label'] = 'Comparanda in Arachne'
             example_pred['oc-gen:predType'] = 'link'
             example_pred['type'] = '@id'
             self.item_json['@context'][2][
                 'skos:editorialNote'] = editorial_pred
             self.item_json['@context'][2]['skos:example'] = example_pred
             self.add_arachne_observation(a_api)
     return self.item_json
Пример #9
0
 def add_arachne_observation(self, a_api):
     """ Adds an observation for Arachne comparanda """
     if 'oc-gen:has-obs' not in self.item_json:
         self.item_json['oc-gen:has-obs'] = []
     arachne_obs = LastUpdatedOrderedDict()
     arachne_obs['id'] = '#obs-' + str(
         len(self.item_json['oc-gen:has-obs']) + 1)
     arachne_obs['oc-gen:sourceID'] = a_api.DEFAULT_API_BASE_URL
     arachne_obs['oc-gen:obsStatus'] = 'active'
     arachne_obs['type'] = 'oc-gen:observations'
     editorial = LastUpdatedOrderedDict()
     editorial['id'] = '#string-arachne-editorial'
     note = ''
     note += '<p>Arachne has: <strong>' + str(
         a_api.result_count) + '</strong> related item(s) with images</p>'
     note += '<p>Browse these comparanda: '
     note += '[<a href="' + a_api.arachne_html_url + '" target="_blank">Link to Arachne search results</a>]</p>'
     note += '<p><small>Open Context editors identified materials in Arachne likley to be relevant for comparison to this type.'
     note += ' <a href="http://arachne.uni-koeln.de/" target="_blank">Arachne</a> is the central object database of the German Archaeological Institute (DAI)'
     note += ' and the Archaeological Institute of the University of Cologne.</small></p>'
     editorial['xsd:string'] = note
     arachne_obs['skos:editorialNote'] = []
     arachne_obs['skos:editorialNote'].append(editorial)
     arachne_obs['skos:example'] = a_api.results
     self.item_json['oc-gen:has-obs'].append(arachne_obs)
Пример #10
0
 def get_proj_types(self, project_uuid):
     """ get predicates used for different types """
     skip_types = ['types', 'predicates']
     output = []
     man_dist = Manifest.objects\
                        .filter(project_uuid=project_uuid)\
                        .values('item_type')\
                        .distinct('item_type')
     for man_type in man_dist:
         item_type = man_type['item_type']
         if item_type not in skip_types:
             item = LastUpdatedOrderedDict()
             item['id'] = project_uuid + '/' + item_type
             item['label'] = 'Descriptions for ' + item_type
             item['class_uri'] = ''
             item['class_label'] = ''
             item['children'] = self.get_proj_type_classes(
                 project_uuid, item_type)
             output.append(item)
     complex_preds = self.get_proj_complex_description_preds(project_uuid)
     if len(complex_preds) > 0:
         item = LastUpdatedOrderedDict()
         item['id'] = project_uuid + '/complex-descriptions'
         item['label'] = 'Descriptions used in Complex Descriptions'
         item['class_uri'] = ''
         item['class_label'] = ''
         item['children'] = ['']
         output.append(item)
     return output
Пример #11
0
    def make_geojson(self, record_index, total_found):
        """Outputs the record object as GeoJSON"""
        geo_json = LastUpdatedOrderedDict()
        geo_json['id'] = '#record-{}-of-{}'.format(record_index, total_found)
        geo_json['label'] = self.label
        geo_json['rdfs:isDefinedBy'] = self.uri
        geo_json['type'] = 'Feature'
        geo_json['category'] = 'oc-api:geo-record'

        geometry = LastUpdatedOrderedDict()
        geometry['id'] = '#record-geom-{}-of-{}'.format(
            record_index, total_found)
        geometry['type'] = self.geo_feature_type
        geometry['coordinates'] = self.geometry_coords
        geo_json['geometry'] = geometry

        if (self.early_date is not None and self.late_date is not None):
            # If we have dates, add them.
            when = LastUpdatedOrderedDict()
            when['id'] = '#record-event-{}-of-{}'.format(
                record_index, total_found)
            when['type'] = 'oc-gen:formation-use-life'
            # convert numeric to GeoJSON-LD ISO 8601
            when['start'] = ISOyears().make_iso_from_float(self.early_date)
            when['stop'] = ISOyears().make_iso_from_float(self.late_date)
            geo_json['when'] = when

        # Now add the properties dict to the GeoJSON
        props_id_value = '#rec-{}-of-{}'.format(record_index, total_found)
        geo_json['properties'] = self.make_client_properties_dict(
            id_value=props_id_value, feature_type='item record')

        return geo_json
Пример #12
0
 def __init__(self):
     self.facet_field_index = 0
     self.dom_id_prefix = False
     self.id = False
     self.defined_by = False
     self.label = False
     self.type = False
     # is the item_type_limit is in effect?
     self.item_type_limited = False
     self.fg_id_options = LastUpdatedOrderedDict()
     self.fg_num_options = LastUpdatedOrderedDict()
     self.fg_date_options = LastUpdatedOrderedDict()
     self.fg_string_options = LastUpdatedOrderedDict()
     self.group_labels = []
     self.id_options = []
     self.numeric_options = []
     self.date_options = []
     self.string_options = []
     self.option_types = []
     self.show_group_labels = False
     # now add groups as keys, with list values
     # for each type of faceted search option
     for group_label in FacetSearchTemplate.SUB_HEADINGS:
         if group_label not in self.fg_id_options:
             self.fg_id_options[group_label] = []
         if group_label not in self.fg_num_options:
             self.fg_num_options[group_label] = []
         if group_label not in self.fg_date_options:
             self.fg_date_options[group_label] = []
         if group_label not in self.fg_string_options:
             self.fg_string_options[group_label] = []
Пример #13
0
 def make_json_for_html(self):
     """ makes JSON strings for embedding in HTML """
     root_obj = []
     if len(self.root_classes) > 0:
         # we have root level categories
         root_dict = LastUpdatedOrderedDict()
         root_dict['root'] = 'Top-Level Classes / Categories'
         root_dict['children'] = self.root_classes
         root_dict['more'] = True
         root_obj.append(root_dict)
     if len(self.root_properties) > 0:
         # we have root level properties
         root_dict = LastUpdatedOrderedDict()
         root_dict['root'] = 'Top-Level Properties / Relations'
         root_dict['children'] = self.root_properties
         root_dict['more'] = True
         root_obj.append(root_dict)
     if len(self.children) > 0:
         # we have concpet children
         root_dict = LastUpdatedOrderedDict()
         if self.entity.entity_type == 'class':
             root_dict['root'] = 'Sub-categories for ' + self.entity.label
         else:
             root_dict['root'] = 'Sub-properties for ' + self.entity.label
         root_dict['children'] = self.children
         root_dict['more'] = True
         root_obj.append(root_dict)
     if len(root_obj) > 0:
         # we items to display for the json_tree
         self.json_tree = json.dumps(root_obj, ensure_ascii=False, indent=4)
Пример #14
0
 def document_missing_old_oc_uuids(self):
     """ checks to see that uuids
         are missing, documents them in a JSON file
     """
     missing = LastUpdatedOrderedDict()
     missing['total-missing'] = 0
     self.get_migrate_old_oc_table_ids()
     for old_table_id in self.table_id_list:
         act_tab = LastUpdatedOrderedDict()
         act_tab['label'] = self.label
         act_tab['records'] = LastUpdatedOrderedDict()
         if isinstance(old_table_id, str):
             uuids = self.get_old_oc_record_uuids(old_table_id,
                                                  True)
             for uuid in uuids:
                 u_ok = ExpCell.objects\
                               .filter(table_id=old_table_id,
                                       uuid=uuid)[:1]
                 if len(u_ok) < 1:
                     missing['total-missing'] += 1
                     print(str(missing['total-missing']) + ' uuid: ' + uuid)
                     if self.act_table_obj is not False:
                         if 'records' in self.act_table_obj:
                             if uuid in self.act_table_obj['records']:
                                 act_tab['records'][uuid] = self.act_table_obj['records'][uuid]
         missing[old_table_id] = act_tab
     missing_json = json.dumps(missing,
                               ensure_ascii=False, indent=4)
     dir_file = self.set_check_directory(self.old_oc_table_dir) + 'missing-uuids.json'
     f = open(dir_file, 'w', encoding='utf-8')
     f.write(missing_json)
     f.close()
Пример #15
0
 def make_sort_links_list(self, request_dict):
     """ makes a list of the links for sort options
     """
     if 'sort' in request_dict:
         request_dict.pop('sort')
     order_opts = [
         {'key': 'asc',
          'order': 'ascending'},
         {'key': 'desc',
          'order': 'descending'}
     ]
     for act_sort in self.SORT_OPTIONS:
         if act_sort['opt']:
             # only make sort_options if the 'opt' key is true
             if act_sort['value'] is not None:
                 for order_opt in order_opts:
                     act_sort_val = act_sort['value'] + self.order_sep + order_opt['key']
                     fl = FilterLinks()
                     fl.base_search_link = self.base_search_link
                     fl.base_request_json = json.dumps(request_dict,
                                                       ensure_ascii=False,
                                                       indent=4)
                     fl.spatial_context = self.spatial_context
                     sort_rparams = fl.add_to_request('sort',
                                                      act_sort_val)
                     links = fl.make_request_urls(sort_rparams)
                     current_sort_obj = LastUpdatedOrderedDict()
                     current_sort_obj['id'] = links['html']
                     current_sort_obj['json'] = links['json']
                     current_sort_obj['type'] = act_sort['type']
                     current_sort_obj['label'] = act_sort['label']
                     current_sort_obj['oc-api:sort-order'] = order_opt['order']
                     in_active_list = False
                     for cur_act_sort in self.current_sorting:
                         if act_sort['type'] == cur_act_sort['type'] \
                            and order_opt['order'] == cur_act_sort['oc-api:sort-order']:
                             # the current sort option is ALREADY in use
                             in_active_list = True
                     if in_active_list is False:
                         # only add the sort option if it's not already in use
                         self.sort_links.append(current_sort_obj)
             else:
                 if self.using_default_sorting is False:
                     # only add a link to the default sorting if
                     # we are not currently using it
                     fl = FilterLinks()
                     fl.base_search_link = self.base_search_link
                     fl.base_request_json = json.dumps(request_dict,
                                                       ensure_ascii=False,
                                                       indent=4)
                     fl.spatial_context = self.spatial_context
                     links = fl.make_request_urls(request_dict)
                     current_sort_obj = LastUpdatedOrderedDict()
                     current_sort_obj['id'] = links['html']
                     current_sort_obj['json'] = links['json']
                     current_sort_obj['type'] = act_sort['type']
                     current_sort_obj['label'] = act_sort['label']
                     current_sort_obj['oc-api:sort-order'] = 'descending'
                     self.sort_links.append(current_sort_obj)
Пример #16
0
    def make_related_media_facets(self, solr_json):
        """Makes related media facets from a solr_json response""" 
        options = []
        for media_config in configs.FACETS_RELATED_MEDIA['oc-api:has-rel-media-options']:
            facet_val_count_tups = utilities.get_path_facet_value_count_tuples(
                media_config['facet_path'], 
                solr_json
            )
            
            media_type_total_count = 0
            for facet_val, facet_count in facet_val_count_tups:
                if facet_val == "0":
                    # Skip, this facet_value is for
                    # items with NO related media of this type
                    continue
                media_type_total_count += facet_count
 
            if media_type_total_count == 0:
                # No items have related media of this type,
                # so continue and don't make a facet option
                # for this.
                continue

            sl = SearchLinks(
                request_dict=copy.deepcopy(self.request_dict),
                base_search_url=self.base_search_url
            )

            # Remove non search related params.
            sl.remove_non_query_params()

            sl.replace_param_value(
                media_config['param_key'],
                new_value=1,
            ) 
            urls = sl.make_urls_from_request_dict()
            if urls['html'] == self.current_filters_url:
                # The new URL matches our current filter
                # url, so don't add this facet option.
                continue

            option = LastUpdatedOrderedDict()
            option['label'] = media_config['label']
            option['count'] = media_type_total_count
            option['id'] = urls['html']
            option['json'] = urls['json']
            options.append(option)
        
        if not len(options):
            # We found no related media configs, so return None
            return None

        # Return the related media facets object.
        rel_media_facets = LastUpdatedOrderedDict()
        rel_media_facets['id'] = configs.FACETS_RELATED_MEDIA['id']
        rel_media_facets['label'] = configs.FACETS_RELATED_MEDIA['label']
        rel_media_facets['oc-api:has-rel-media-options'] = options
        return rel_media_facets
Пример #17
0
 def __init__(self):
     self.tree = None
     self.project_uuid = False
     self.source_id = False
     self.relation_types = LastUpdatedOrderedDict()
     self.entities = LastUpdatedOrderedDict()
     self.oc_config_relation_types = 'oc-relation-types'
     self.oc_config_entities = 'oc-entities'
     self.reconcile_key = 'faims_id'
     self.fm = FileManage()
Пример #18
0
 def process_solr_tiles(self, solr_tiles):
     """ processes the solr_json 
         discovery geo tiles,
         aggregating to a certain
         depth
     """
     # first aggregate counts for tile that belong togther
     aggregate_tiles = self.aggregate_spatial_tiles(solr_tiles)
     # now generate GeoJSON for each tile region
     # print('Total tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles)))
     i = 0
     for tile_key, aggregate_count in aggregate_tiles.items():
         i += 1
         add_region = True
         fl = FilterLinks()
         fl.base_request_json = self.filter_request_dict_json
         fl.spatial_context = self.spatial_context
         new_rparams = fl.add_to_request('disc-geotile', tile_key)
         record = LastUpdatedOrderedDict()
         record['id'] = fl.make_request_url(new_rparams)
         record['json'] = fl.make_request_url(new_rparams, '.json')
         record['count'] = aggregate_count
         record['type'] = 'Feature'
         record['category'] = 'oc-api:geo-facet'
         if self.min_date is not False \
            and self.max_date is not False:
             when = LastUpdatedOrderedDict()
             when['id'] = '#event-' + tile_key
             when['type'] = 'oc-gen:formation-use-life'
             # convert numeric to GeoJSON-LD ISO 8601
             when['start'] = ISOyears().make_iso_from_float(self.min_date)
             when['stop'] = ISOyears().make_iso_from_float(self.max_date)
             record['when'] = when
         gm = GlobalMercator()
         geo_coords = gm.quadtree_to_geojson_poly_coords(tile_key)
         geometry = LastUpdatedOrderedDict()
         geometry['id'] = '#geo-disc-tile-geom-' + tile_key
         geometry['type'] = 'Polygon'
         geometry['coordinates'] = geo_coords
         record['geometry'] = geometry
         properties = LastUpdatedOrderedDict()
         properties['id'] = '#geo-disc-tile-' + tile_key
         properties['href'] = record['id']
         properties['label'] = 'Discovery region (' + str(i) + ')'
         properties['feature-type'] = 'discovery region (facet)'
         properties['count'] = aggregate_count
         properties['early bce/ce'] = self.min_date
         properties['late bce/ce'] = self.max_date
         record['properties'] = properties
         if len(tile_key) >= 6:
             if tile_key[:6] == '211111':
                 # no bad coordinates (off 0, 0 coast of Africa)
                 add_region = False  # don't display items without coordinates
         if add_region:
             self.geojson_regions.append(record)
Пример #19
0
 def json_geo_overlay(self):
     """Output a json string for the geo_overlays."""
     output = LastUpdatedOrderedDict()
     output['overlays'] = []
     for geo_media in self.geo_overlays:
         geo = LastUpdatedOrderedDict()
         geo['url'] = geo_media.full_file_obj.file_uri
         geo['metadata'] = geo_media.metadata
         output['overlays'].append(geo)
     return json.dumps(output,
                       indent=4,
                       ensure_ascii=False)
Пример #20
0
 def db_save_reconcile_predicates_types(self, act_dir):
     """ saves predicates and type items to the
         Open Context database, and / or reconciles these
         items with previously saved items from the same project
     """
     key = self.oc_config_attributes
     json_obj = self.fm.get_dict_from_file(key, act_dir)
     if json_obj is None:
         print('Need to 1st generate an attributes file from the ArchEnts!')
         ok = False
     else:
         # we have JSON with dictionary for the attributes
         ok = True
         self.attributes = json_obj
         for faims_id_pred, attrib_dict in json_obj.items():
             # default to always making a predicate and a type for attributes
             sup_dict = LastUpdatedOrderedDict()
             sup_dict[self.reconcile_key] = faims_id_pred
             pm = PredicateManagement()
             pm.project_uuid = self.project_uuid
             pm.source_id = self.source_id
             pm.sup_dict = sup_dict
             pm.sup_reconcile_key = self.reconcile_key
             pm.sup_reconcile_value = faims_id_pred
             pred_obj = pm.get_make_predicate(attrib_dict['label'],
                                              attrib_dict['predicate_type'],
                                              attrib_dict['data_type'])
             if pred_obj is not False:
                 # we reconciled the predicate!
                 self.attributes[faims_id_pred]['predicate_uuid'] = str(
                     pred_obj.uuid)
                 if 'objects' in attrib_dict:
                     for faims_id_type, type_dict in attrib_dict[
                             'objects'].items():
                         sup_dict = LastUpdatedOrderedDict()
                         sup_dict[self.reconcile_key] = faims_id_type
                         tm = TypeManagement()
                         tm.project_uuid = self.project_uuid
                         tm.source_id = self.source_id
                         tm.sup_dict = sup_dict
                         tm.sup_reconcile_key = self.reconcile_key
                         tm.sup_reconcile_value = faims_id_type
                         type_obj = tm.get_make_type_within_pred_uuid(
                             pred_obj.uuid, type_dict['label'])
                         if type_obj is not False:
                             # we have reconciled the type!
                             type_dict['type_uuid'] = str(type_obj.uuid)
                             type_dict['predicate_uuid'] = str(
                                 pred_obj.uuid)
                             self.attributes[faims_id_pred]['objects'][
                                 faims_id_type] = type_dict
         # now save the results
         self.fm.save_serialized_json(key, act_dir, self.attributes)
Пример #21
0
 def make_dict_from_anno_obj(self, anno_obj):
     """ returns an ordered dict for an import field annotation object """
     anno_dict = LastUpdatedOrderedDict()
     anno_dict['id'] = anno_obj.id
     sub_field_obj = self.get_field_object(anno_obj.field_num)
     if sub_field_obj is not False:
         anno_dict['subject'] = self.make_dict_from_field_obj(sub_field_obj)
         anno_dict['subject']['id'] = anno_obj.field_num
     else:
         anno_dict['subject'] = False
     if anno_obj.predicate_field_num > 0:
         pred_field_obj = self.get_field_object(anno_obj.predicate_field_num)
         anno_dict['predicate'] = self.make_dict_from_field_obj(pred_field_obj)
         anno_dict['predicate']['id'] = anno_obj.predicate_field_num
         anno_dict['predicate']['type'] = 'import-field'
     else:
         anno_dict['predicate'] = LastUpdatedOrderedDict()
         anno_dict['predicate']['id'] = anno_obj.predicate
         ent = Entity()
         found = ent.dereference(anno_obj.predicate)
         if found:
             anno_dict['predicate']['label'] = ent.label
             anno_dict['predicate']['type'] = ent.item_type
         elif anno_obj.predicate == ImportFieldAnnotation.PRED_CONTAINED_IN:
             anno_dict['predicate']['label'] = 'Contained in'
         elif anno_obj.predicate == ImportFieldAnnotation.PRED_DESCRIBES:
             anno_dict['predicate']['label'] = 'Describes'
         elif anno_obj.predicate == ImportFieldAnnotation.PRED_VALUE_OF:
             anno_dict['predicate']['label'] = 'Value of'
         elif anno_obj.predicate == ImportFieldAnnotation.PRED_MEDIA_PART_OF:
             anno_dict['predicate']['label'] = 'Media part of'
         else:
             anno_dict['predicate']['label'] = False
             anno_dict['predicate']['type'] = False
     if anno_obj.object_field_num > 0:
         obj_field_obj = self.get_field_object(anno_obj.object_field_num)
         anno_dict['object'] = self.make_dict_from_field_obj(obj_field_obj)
         anno_dict['object']['id'] = anno_obj.object_field_num
         anno_dict['object']['type'] = 'import-field'
     else:
         anno_dict['object'] = LastUpdatedOrderedDict()
         anno_dict['object']['id'] = anno_obj.object_uuid
         ent = Entity()
         found = ent.dereference(anno_obj.object_uuid)
         if found:
             anno_dict['object']['label'] = ent.label
             anno_dict['object']['type'] = ent.item_type
         else:
             anno_dict['object']['label'] = False
             anno_dict['object']['type'] = False
     return anno_dict
Пример #22
0
    def get_orphan_items(self, item_type, consider=['contain', 'link']):
        """ gets a list of items
            that do not have descriptions
        """
        assertion_limit = ''
        if 'contain' in consider:
            assertion_limit += ' AND oc_assertions.predicate_uuid !='
            assertion_limit += '\'' + Assertion.PREDICATES_CONTAINS + '\''
        if 'link' in consider:
            assertion_limit += ' AND oc_assertions.predicate_uuid !='
            assertion_limit += '\'' + Assertion.PREDICATES_LINK + '\''
        sql = 'SELECT oc_manifest.uuid AS uuid, \
               oc_manifest.label AS label, \
               oc_manifest.item_type AS item_type, \
               oc_manifest.class_uri AS class_uri \
               FROM oc_manifest \
               LEFT JOIN oc_assertions ON \
               (oc_manifest.uuid = oc_assertions.uuid \
               ' + assertion_limit + ') \
               WHERE oc_manifest.project_uuid = \
               \'' + self.project_uuid + '\' \
               AND oc_manifest.item_type = \
               \'' + item_type + '\' \
               AND oc_assertions.uuid IS NULL \
               ORDER BY oc_manifest.sort; '

        non_descript = Manifest.objects.raw(sql)
        for dull_man in non_descript:
            item_type = dull_man.item_type
            class_uri = dull_man.class_uri
            if len(class_uri) < 1:
                class_uri = item_type
            if item_type not in self.blank_items:
                self.blank_items[item_type] = LastUpdatedOrderedDict()
            if class_uri not in self.blank_items[item_type]:
                class_dict = LastUpdatedOrderedDict()
                ent = Entity()
                found = ent.dereference(class_uri)
                if found:
                    class_dict['label'] = ent.label
                else:
                    class_dict['label'] = item_type
                class_dict['items'] = []
                self.blank_items[item_type][class_uri] = class_dict
            item = LastUpdatedOrderedDict()
            item['uuid'] = dull_man.uuid
            item['label'] = dull_man.label
            self.blank_items[item_type][class_uri]['items'].append(item)
        return self.blank_items
Пример #23
0
 def get_catal_related(self):
     """ Check to see if this item has related data
         in the Çatalhöyük Living Archive
     """
     label = self.item_json['label']
     category_list = []
     project_list = []
     if 'category' in self.item_json:
         category_list = self.item_json['category']
     if 'dc-terms:isPartOf' in self.item_json:
         project_list = self.item_json['dc-terms:isPartOf']
     catal_api = CatalLivingArchiveAPI()
     catal_api.check_relevance(category_list, project_list)
     if catal_api.relevant:
         catal_api.get_unit(label)
         if catal_api.has_data:
             editorial_pred = LastUpdatedOrderedDict()
             editorial_pred[
                 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#editorialNote'
             editorial_pred['slug'] = 'skos-editorialnote'
             editorial_pred[
                 'label'] = 'About Çatalhöyük Living Archive Data'
             editorial_pred['oc-gen:predType'] = 'variable'
             editorial_pred['type'] = 'xsd:string'
             props_pred = LastUpdatedOrderedDict()
             props_pred[
                 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#definition'
             props_pred['slug'] = 'skos-definition'
             props_pred[
                 'label'] = 'Çatalhöyük Living Archive: Unit Properties'
             props_pred['oc-gen:predType'] = 'variable'
             props_pred['type'] = 'xsd:string'
             finds_pred = LastUpdatedOrderedDict()
             finds_pred[
                 'owl:sameAs'] = 'http://www.w3.org/2004/02/skos/core#note'
             finds_pred['slug'] = 'skos-note'
             finds_pred['label'] = 'Çatalhöyük Living Archive: Unit Finds'
             finds_pred['oc-gen:predType'] = 'variable'
             finds_pred['type'] = 'xsd:string'
             self.item_json['@context'][2][
                 'skos:editorialNote'] = editorial_pred
             if catal_api.props_count > 0:
                 self.item_json['@context'][2][
                     'skos:definition'] = props_pred
             if catal_api.finds_count > 0:
                 self.item_json['@context'][2]['skos:note'] = finds_pred
             self.add_catal_observation(catal_api)
     return self.item_json
Пример #24
0
 def get_all_projects(self):
     """ Processes the current batch, determined by the row number
         by running the individual import processes in the proper order
     """
     output = []
     man_projs = Manifest.objects\
                         .filter(item_type='projects')\
                         .order_by('-revised',
                                   '-published',
                                   '-record_updated')
     for man_proj in man_projs:
         act_item = LastUpdatedOrderedDict()
         act_item['uuid'] = man_proj.uuid
         act_item['label'] = man_proj.label
         act_item['published'] = man_proj.published
         act_item['revised'] = man_proj.revised
         try:
             pobj = Project.objects.get(uuid=man_proj.uuid)
             act_item['edit_status'] = pobj.edit_status
             act_item['short_des'] = pobj.short_des
         except Project.DoesNotExist:
             act_item['edit_status'] = False
             act_item['short_des'] = ''
         p_sources = ImportSource.objects\
                                 .filter(project_uuid=man_proj.uuid)
         act_item['count_imp'] = len(p_sources)
         output.append(act_item)
     return output
Пример #25
0
 def classify_xml_tree_relation_types(self):
     """ gets xml relation types """
     if self.tree is not False:
         rel_counts = {}
         rel_types = self.tree.xpath('/relationships/relationshipType')
         for rel_type in rel_types:
             faims_id = rel_type.get('relntypeid')
             relations = rel_type.xpath('relationship')
             relation_count = len(relations)
             rel_counts[faims_id] = relation_count
             rel_type_obj = LastUpdatedOrderedDict()
             rel_type_obj['id'] = faims_id
             rel_type_obj['label'] = rel_type.get('relntypename')
             rel_type_obj['oc-equiv'] = None
             rel_type_obj['order'] = None
             rel_type_obj['predicate_uuid'] = None
             rel_type_obj['data_type'] = 'id'
             rel_type_obj['predicate_type'] = 'link'
             rel_type_obj['count'] = relation_count
             self.relation_types[faims_id] = rel_type_obj
         s = [(faims_id, rel_counts[faims_id])
              for faims_id in sorted(rel_counts, key=rel_counts.get)]
         i = 0
         for faims_id, count in s:
             i += 1
             self.relation_types[faims_id]['order'] = i
Пример #26
0
    def add_project_predicates_and_annotations_to_graph(self, graph):
        """ gets the project predicates and their
            annotations with database calls
        """
        pred_sql_dict_list = self.get_working_project_predicates()
        la_preds = self.get_link_annotations_for_preds(pred_sql_dict_list)
        if not isinstance(pred_sql_dict_list, list):
            # No predicates in the project. Weird, but possible
            return graph
        annotated_pred_uuids = {la.subject: [] for la in la_preds}
        for la in la_preds:
            annotated_pred_uuids[la.subject].append(la)
        for sql_dict in pred_sql_dict_list:
            act_pred = LastUpdatedOrderedDict()
            act_pred['@id'] = 'oc-pred:' + str(sql_dict['slug'])
            act_pred['owl:sameAs'] = URImanagement.make_oc_uri(
                sql_dict['predicate_uuid'], 'predicates')
            act_pred['label'] = sql_dict['label']
            act_pred['uuid'] = sql_dict['predicate_uuid']
            act_pred['slug'] = sql_dict['slug']
            if isinstance(sql_dict['class_uri'],
                          str) and len(sql_dict['class_uri']) > 0:
                act_pred['oc-gen:predType'] = sql_dict['class_uri']

            uuid_la_preds = annotated_pred_uuids.get(
                sql_dict['predicate_uuid'], [])
            for la_pred in uuid_la_preds:
                la_pred_uri = URImanagement.prefix_common_uri(
                    la_pred.predicate_uri)
                act_pred = self.add_unique_object_dict_to_pred(
                    act_pred, la_pred_uri, la_pred.object_uri)
            graph.append(act_pred)
        return graph
Пример #27
0
 def __init__(self, id_href=True):
     # for geo_json_context
     self.geo_json_context = self.GEO_JSON_CONTEXT_URI
     context = LastUpdatedOrderedDict()
     context['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
     context['rdfs'] = 'http://www.w3.org/2000/01/rdf-schema#'
     context['xsd'] = 'http://www.w3.org/2001/XMLSchema#'
     context['skos'] = 'http://www.w3.org/2004/02/skos/core#'
     context['owl'] = 'http://www.w3.org/2002/07/owl#'
     context['dc-terms'] = 'http://purl.org/dc/terms/'
     context['dcmi'] = 'http://dublincore.org/documents/dcmi-terms/'
     context['bibo'] = 'http://purl.org/ontology/bibo/'
     context['foaf'] = 'http://xmlns.com/foaf/0.1/'
     context['cidoc-crm'] = 'http://erlangen-crm.org/current/'
     context['dcat'] = 'http://www.w3.org/ns/dcat#'
     context['geojson'] = 'https://purl.org/geojson/vocab#'
     context['cc'] = 'http://creativecommons.org/ns#'
     context['nmo'] = 'http://nomisma.org/ontology#'
     context['oc-gen'] = 'http://opencontext.org/vocabularies/oc-general/'
     context['oc-pred'] = 'http://opencontext.org/predicates/'
     context['@language'] = Languages().DEFAULT_LANGUAGE
     context['id'] = '@id'
     context['label'] = 'rdfs:label'
     context['uuid'] = 'dc-terms:identifier'
     context['slug'] = 'oc-gen:slug'
     context['type'] = '@type'
     context['category'] = {'@id': 'oc-gen:category', '@type': '@id'}
     context['owl:sameAs'] = {'@type': '@id'}
     context['skos:altLabel'] = {'@container': '@language'}
     context['xsd:string'] = {'@container': '@language'}
     context['description'] = {'@id': 'dc-terms:description', '@container': '@language'}
     for pred in settings.TEXT_CONTENT_PREDICATES:
         if pred not in context:
             context[pred] = {'@container': '@language'}
     self.context = context
Пример #28
0
 def check_make_valid_label(self, label, prefix='', num_id_len=False):
     """ Checks a label,
         if not valid suggests an alternative
         based on the prefix.
         If unique_in_project is True then
         validate uniqueness within the project,
         if false then the label will be checked
         for uniqueness within a context
     """
     output = LastUpdatedOrderedDict()
     if isinstance(label, str):
         if len(label) < 1:
             label = False
     output['checked'] = label
     output['suggested'] = self.suggest_valid_label(prefix, num_id_len)
     if label is False:
         output['exists'] = None
         output['valid'] = None
     else:
         exist_id = self.check_label_exists_in_scope(label)
         if exist_id is False:
             output['exists'] = False
             output['valid'] = True
             if self.uuid is not False:
                 manifest_item = self.get_manifest_item(self.uuid)
                 if manifest_item is not False:
                     output['suggested'] = manifest_item.label
                     output['uuid'] = manifest_item.uuid
         else:
             output['exists'] = True
             output['exists_uuid'] = exist_id
             output['valid'] = False
     return output
Пример #29
0
 def get_project(self, project_uuid):
     """ Processes the current batch, determined by the row number
         by running the individual import processes in the proper order
     """
     act_item = LastUpdatedOrderedDict()
     try:
         man_proj = Manifest.objects.get(uuid=project_uuid)
     except Manifest.DoesNotExist:
         act_item = False
     if act_item is not False:
         act_item['uuid'] = man_proj.uuid
         act_item['label'] = man_proj.label
         act_item['published'] = man_proj.published
         act_item['revised'] = man_proj.revised
         try:
             pobj = Project.objects.get(uuid=man_proj.uuid)
             act_item['edit_status'] = pobj.edit_status
             act_item['short_des'] = pobj.short_des
         except Project.DoesNotExist:
             act_item['edit_status'] = False
             act_item['short_des'] = ''
         # get sources from refine first, since it lets us know if updated
         refine_sources = self.relate_refine_local_sources()
         raw_p_sources = ImportSource.objects\
                                     .filter(project_uuid=project_uuid)\
                                     .order_by('-updated')
         raw_p_sources = self.note_unimport_ok(raw_p_sources)
         p_sources = self.note_reloadable_sources(raw_p_sources)
         act_item['sources'] = p_sources
         act_item['refines'] = refine_sources
         act_item['ref_baseurl'] = RefineAPI().get_project_base_url()
         act_item['refine_ok'] = self.refine_ok
     return act_item
Пример #30
0
 def add_predicate_json_ld(self):
     """ adds predicate specific information to the JSON-LD object """
     try:
         predicate = Predicate.objects.get(uuid=self.manifest.uuid)
     except Predicate.DoesNotExist:
         predicate = None
     if isinstance(predicate, Predicate):
         self.json_ld['oc-gen:data-type'] = predicate.data_type
         p_range = LastUpdatedOrderedDict()
         p_range['id'] = predicate.data_type
         if predicate.data_type == 'id':
             p_range['id'] = 'http://opencontext.org/vocabularies/oc-general/items'
             p_range['label'] = 'URI identified items'
         elif predicate.data_type == 'xsd:string':
             p_range['label'] = 'Alphanumeric text strings'
         elif predicate.data_type == 'xsd:double':
             p_range['label'] = 'Decimal values'
         elif predicate.data_type == 'xsd:integer':
             p_range['label'] = 'Integer values'
         elif predicate.data_type == 'xsd:date':
             p_range['label'] = 'Calendar / date values'
         self.json_ld['rdfs:range'] = [p_range]
         if self.assertion_hashes:
             # add a default sort order, for edit view JSON
             self.json_ld['oc-gen:default-sort-order'] = float(predicate.sort)