Пример #1
0
 def add_source_cells(self, uuid, row_num, item_data):
     """ Adds source data records for an assertion """
     predicate_values = LastUpdatedOrderedDict()
     project_uuid = item_data[0].project_uuid
     for assertion in item_data:
         predicate_uuid = assertion.predicate_uuid
         object_uuid = assertion.object_uuid
         if assertion.object_type == 'xsd:string':
             try:
                 oc_str = OCstring.objects.get(uuid=object_uuid)
                 obj_val = oc_str.content
             except OCstring.DoesNotExist:
                 obj_val = ''
         elif assertion.object_type in ['xsd:integer', 'xsd:double']:
             # numeric value
             obj_val = str(assertion.data_num)
         elif assertion.object_type == 'xsd:date':
             obj_val = str(assertion.data_date)
         else:
             obj_val = str(self.deref_entity_label(object_uuid))
         if predicate_uuid not in predicate_values:
             # make a list, since some predicates are multi-valued
             predicate_values[predicate_uuid] = []
         predicate_values[predicate_uuid].append(obj_val)
     for predicate_uuid, val_list in predicate_values.items():
         field_num = self.get_add_predicate_field_number(predicate_uuid)
         cell = ExpCell()
         cell.table_id = self.table_id
         cell.uuid = uuid
         cell.project_uuid = project_uuid
         cell.row_num = row_num
         cell.field_num = field_num
         cell.record = self.multi_source_value_delim.join(val_list)  # semi-colon delim for multivalued predicates
         cell.save()
         cell = None
Пример #2
0
 def add_project_types_with_annotations_to_graph(self, graph):
     """ adds project types that have annotations """
     type_sql_dict_list = self.get_working_project_types()
     if isinstance(type_sql_dict_list, list):
         # consolidate things so a given type is given once in the list
         # of a graph. To do so, we first put everything in a all_types
         # dict
         all_types = LastUpdatedOrderedDict()
         for sql_dict in type_sql_dict_list:
             type_uri = URImanagement.make_oc_uri(sql_dict['type_uuid'],
                                                           'types')
             if type_uri not in all_types:
                 act_type = LastUpdatedOrderedDict()
                 act_type['@id'] = type_uri 
                 act_type['label'] = sql_dict['type_label']
                 act_type['owl:sameAs'] = URImanagement.make_oc_uri(sql_dict['type_slug'],
                                                                    'types')
                 act_type['uuid'] = sql_dict['type_uuid']
                 act_type['slug'] = sql_dict['type_slug']
             else:
                 act_type = all_types[type_uri]
             la_pred_uri = URImanagement.prefix_common_uri(sql_dict['predicate_uri'])
             if la_pred_uri not in act_type:
                 act_type[la_pred_uri] = []
             la_object_item = self.make_object_dict_item(sql_dict['object_uri'])
             act_type[la_pred_uri].append(la_object_item)
             all_types[type_uri] = act_type
         for type_uri, act_type in all_types.items():
             graph.append(act_type)
     return graph
Пример #3
0
 def __init__(self):
     self.table_id = False
     self.label = False
     self.dates_bce_ce = True  # calendar dates in BCE/CE, if false BP
     self.include_equiv_ld = True  # include linked data related by EQUIV_PREDICATES
     self.include_ld_obj_uris = True  # include URIs to linked data objects
     self.include_ld_source_values = True  # include original values annoted as
                                           # equivalent to linked data
     self.boolean_multiple_ld_fields = 'yes'  # for multiple values of linked data
                                              # (same predicate, multiple objects)
                                              # make multiple fields if NOT False.
                                              # When this value is NOT False, its
                                              # string value indicates presence of
                                              # a linked data object uri.
     self.include_original_fields = False  # include original field data
     self.fields = []
     self.context_fields = LastUpdatedOrderedDict()
     self.ld_fields = LastUpdatedOrderedDict()
     self.predicate_fields = LastUpdatedOrderedDict()
     self.multi_source_value_delim = '; '  # delimiter for multiple values in source data field
     self.obs_limits = []  # limits predicate exports to listed observation numbers, no limit if empty
     self.entities = {}
     self.predicate_uris_boolean_types = False  # predicate_uris expressed as boolean types
     self.predicate_uuids = LastUpdatedOrderedDict()  # predicate uuids used with a table
     self.ld_predicates = LastUpdatedOrderedDict()  # unique linked_data predicates
     self.ld_object_equivs = LastUpdatedOrderedDict()  # unique linked_data predicates
     self.dc_contributor_ids = {}  # dict with ID keys and counts of dc-terms:contributor
     self.dc_creator_ids = {}  # dict with ID keys and counts of dc-terms:creator
     self.uuidlist = []
     self.parents = {}  # dict of uuids for parent entities to keep them in memory
Пример #4
0
 def __init__(self):
     self.tree = None
     self.project_uuid = False
     self.source_id = False
     self.import_persons = {}
     self.root_subject_label = False
     self.root_subject_uuid = False
     self.root_subject_context = False
     self.root_subject_class = 'oc-gen:cat-site'
     self.root_subject_sup_id = 'auto-root'
     self.load_into_importer = False
     self.dt_attribute_objs = LastUpdatedOrderedDict()
     self.attributes = LastUpdatedOrderedDict()
     self.entity_types = LastUpdatedOrderedDict()
     self.relation_types = LastUpdatedOrderedDict()
     self.entities = LastUpdatedOrderedDict()
     self.oc_config_relation_types = 'oc-relation-types'
     self.oc_config_entity_types = 'oc-entity-types'
     self.oc_config_attributes = 'oc-attributes'
     self.oc_config_entities = 'oc-entities'
     self.reconcile_key = 'faims_id'
     self.ent_type_pred_sup_id = 'auto-entity-type'
     self.fm = FileManage()
Пример #5
0
 def get_predicate_uuids(self):
     """ Gets predicate uuids for a table """
     self.entities = {}  # resets the entites, no need to keep context entitites in memory
     self.check_reload_fields_from_db()  # gets fields from DB, if process was interrupted
     limit_obs = False
     if isinstance(self.obs_limits, list):
         if len(self.obs_limits) > 0:
             limit_obs = True
     uuids = UUIDListExportTable(self.table_id).uuids
     # seems faster than a select distinct with a join.
     for uuid in uuids:
         if limit_obs:
             pred_uuids = Assertion.objects\
                                   .values_list('predicate_uuid', flat=True)\
                                   .filter(uuid=uuid,
                                           obs_num__in=self.obs_limits)
         else:
             pred_uuids = Assertion.objects\
                                   .values_list('predicate_uuid', flat=True)\
                                   .filter(uuid=uuid)
         item_preds = LastUpdatedOrderedDict()
         for pred_uuid in pred_uuids:
             if pred_uuid not in item_preds:
                 item_preds[pred_uuid] = 1
             else:
                 item_preds[pred_uuid] += 1
         for pred_uuid, count in item_preds.items():
             if pred_uuid not in self.predicate_uuids:
                 pred_label = self.deref_entity_label(pred_uuid)
                 pred_type = self.entities[pred_uuid].data_type
                 self.predicate_uuids[pred_uuid] = {'count': count,
                                                    'label': pred_label,
                                                    'type': pred_type}
             else:
                 if self.predicate_uuids[pred_uuid]['count'] < count:
                     self.predicate_uuids[pred_uuid]['count'] = count
     return self.predicate_uuids
Пример #6
0
 def load_or_classify_attributes(self, act_dir):
     """ loads or classifies attributes in a tree """
     key = self.oc_config_attributes
     json_obj = self.fm.get_dict_from_file(key, act_dir)
     if json_obj is None:
         # need to read the XML and make the classifications from scratch
         self.classify_xml_tree_attributes()
         # now make dictionary objects to save as JSON
         self.attributes = LastUpdatedOrderedDict()
         for prop_id, dt_class_obj in self.dt_attribute_objs.items():
             attrib_dict = dt_class_obj.make_dict_obj()
             attrib_dict['predicate_type'] = 'variable'
             attrib_dict['predicate_type'] = 'variable'  # default type
             attrib_dict['oc-equiv'] = None  # default to no equivalence
             attrib_dict = self.check_attribute_as_identifier(attrib_dict,
                                                              ImportFieldAnnotation.PRED_CONTAINED_IN)
             if prop_id not in self.attributes:
                 self.attributes[prop_id] = attrib_dict
         self.fm.save_serialized_json(key,
                                      act_dir,
                                      self.attributes)
     else:
         # we have JSON with dictionary objects to read into the classes
         self.attributes = json_obj
         for prop_id, attrib_dict in self.attributes.items():
             dt_class_obj = DescriptionDataType()
             ok = dt_class_obj.read_dict_obj(attrib_dict)
             if ok:
                 self.dt_attribute_objs[prop_id] = dt_class_obj
         # now update if new attributes where found
         save_update = False
         for prop_id, dt_class_obj in self.dt_attribute_objs.items():
             attrib_dict = dt_class_obj.make_dict_obj()
             attrib_dict['predicate_type'] = 'variable'  # default type
             attrib_dict['oc-equiv'] = None  # default to no equivalence
             attrib_dict = self.check_attribute_as_identifier(attrib_dict,
                                                              ImportFieldAnnotation.PRED_CONTAINED_IN)
             if prop_id not in self.attributes:
                 save_update = True
                 self.attributes[prop_id] = attrib_dict
         if save_update:
             self.fm.save_serialized_json(key,
                                          act_dir,
                                          self.attributes)
Пример #7
0
class Create():

    EQUIV_PREDICATES = ['skos:closeMatch',
                        'http://www.w3.org/2004/02/skos/core#closeMatch']

    def __init__(self):
        self.table_id = False
        self.label = False
        self.dates_bce_ce = True  # calendar dates in BCE/CE, if false BP
        self.include_equiv_ld = True  # include linked data related by EQUIV_PREDICATES
        self.include_ld_obj_uris = True  # include URIs to linked data objects
        self.include_ld_source_values = True  # include original values annoted as
                                              # equivalent to linked data
        self.boolean_multiple_ld_fields = 'yes'  # for multiple values of linked data
                                                 # (same predicate, multiple objects)
                                                 # make multiple fields if NOT False.
                                                 # When this value is NOT False, its
                                                 # string value indicates presence of
                                                 # a linked data object uri.
        self.include_original_fields = False  # include original field data
        self.fields = []
        self.context_fields = LastUpdatedOrderedDict()
        self.ld_fields = LastUpdatedOrderedDict()
        self.predicate_fields = LastUpdatedOrderedDict()
        self.multi_source_value_delim = '; '  # delimiter for multiple values in source data field
        self.obs_limits = []  # limits predicate exports to listed observation numbers, no limit if empty
        self.entities = {}
        self.predicate_uris_boolean_types = False  # predicate_uris expressed as boolean types
        self.predicate_uuids = LastUpdatedOrderedDict()  # predicate uuids used with a table
        self.ld_predicates = LastUpdatedOrderedDict()  # unique linked_data predicates
        self.ld_object_equivs = LastUpdatedOrderedDict()  # unique linked_data predicates
        self.dc_contributor_ids = {}  # dict with ID keys and counts of dc-terms:contributor
        self.dc_creator_ids = {}  # dict with ID keys and counts of dc-terms:creator
        self.uuidlist = []
        self.parents = {}  # dict of uuids for parent entities to keep them in memory

    def prep_default_fields(self):
        """ Prepares initial set of default fields for export tables """
        self.fields.append({'label': 'URI',
                            'rel_ids': ['@id'],
                            'field_num': 1})
        self.fields.append({'label': 'Label',
                            'rel_ids': ['label'],
                            'field_num': 2})
        self.fields.append({'label': 'Project',
                            'rel_ids': ['proj-label'],
                            'field_num': 3})
        self.fields.append({'label': 'Project URI',
                            'rel_ids': ['proj-uri'],
                            'field_num': 4})
        self.fields.append({'label': 'Item Category',
                            'rel_ids': ['item-category'],
                            'field_num': 5})
        self.fields.append({'label': 'Last Updated',
                            'rel_ids': ['last-updated'],
                            'field_num': 6})
        self.fields.append({'label': 'Authorship',
                            'rel_ids': ['authorship'],
                            'field_num': 7})
        self.fields.append({'label': 'Latitude (WGS-84)',
                            'rel_ids': ['latitude'],
                            'field_num': 8})
        self.fields.append({'label': 'Longitude (WGS-84)',
                            'rel_ids': ['longitude'],
                            'field_num': 9})
        self.fields.append({'label': 'Geospatial note',
                            'rel_ids': ['geospatial-note'],
                            'field_num': 10})
        if self.dates_bce_ce:
            self.fields.append({'label': 'Early Date (BCE/CE)',
                                'rel_ids': ['early-bce-ce'],
                                'field_num': 11})
            self.fields.append({'label': 'Late Date (BCE/CE)',
                                'rel_ids': ['late-bce-ce'],
                                'field_num': 12})
        else:
            self.fields.append({'label': 'Early Date (BP)',
                                'rel_ids': ['early-bp'],
                                'field_num': 11})
            self.fields.append({'label': 'Late Date (BP)',
                                'rel_ids': ['late-bp'],
                                'field_num': 12})
        self.fields.append({'label': 'Context URI',
                            'rel_ids': ['context-uri'],
                            'field_num': 13})
        for field in self.fields:
            self.save_field(field)

    def save_field(self, field):
        """ Saves a record of a field """
        exfield = ExpField()
        exfield.table_id = self.table_id
        exfield.field_num = field['field_num']
        exfield.label = field['label']
        exfield.rel_ids = json.dumps(field['rel_ids'], ensure_ascii=False)
        exfield.save()

    def check_reload_fields_from_db(self):
        """ Reloads fields, incase a process was interrupted """
        if len(self.fields) < 1:
            exfields = ExpField.objects\
                               .filter(table_id=self.table_id)\
                               .order_by('field_num')
            for exfield in exfields:
                field = {}
                field['field_num'] = exfield.field_num
                field['label'] = exfield.label
                field['rel_ids'] = json.loads(exfield.rel_ids)
                self.fields.append(field)

    def prep_process_uuids_by_projects_class(self, project_uuids, class_uri):
        """ Gets a list of uuids and basic metadata about items for the
            export table. Does so in the simpliest way, filtering only
            by a list of project_uuids and class_uri """
        self.prep_default_fields()
        self.uuidlist = UUIDListSimple(project_uuids, class_uri).uuids
        self.process_uuid_list(self.uuidlist)
        self.get_predicate_uuids()  # now prepare to do item descriptions
        self.get_predicate_link_annotations()  # even if not showing linked data
        self.process_ld_predicates_values()  # only if exporting linked data
        self.save_ld_fields()  # only if exporting linked data
        self.update_table_metadata()  # save a record of the table metadata

    def prep_process_uuid_list(self, uuids, do_linked_data=False):
        """ prepares default fields and exports a list of items """
        self.uuidlist = uuids
        self.prep_default_fields()
        self.process_uuid_list(self.uuidlist)
        self.get_predicate_uuids()  # now prepare to do item descriptions
        self.get_predicate_link_annotations()  # even if not showing linked data
        if do_linked_data:
            self.process_ld_predicates_values()  # only if exporting linked data
            self.save_ld_fields()  # only if exporting linked data
        self.save_source_fields()  # save source data, possibly limited by observations
        self.update_table_metadata()  # save a record of the table metadata

    def process_uuid_list(self, uuids, starting_row=1):
        row_num = starting_row
        for uuid in uuids:
            try:
                man = Manifest.objects.get(uuid=uuid)
            except Manifest.DoesNotExist:
                man = False
            if man is not False:
                print(str(row_num) + ': ' + str(uuid))
                self.save_basic_default_field_cells(row_num, man)
                self.save_authorship(row_num, man)
                context_metadata = self.get_parents_context_metadata(man.uuid)
                self.save_default_geo(row_num, man, context_metadata['geo'])
                self.save_default_chrono(row_num, man, context_metadata['event'])
                self.save_context(row_num, man, context_metadata['p_list'])
                row_num += 1
            else:
                print(uuid + ' Failed!')

    def get_parents_context_metadata(self, uuid):
        """ get all parents from memory or by DB lookups """
        if len(self.parents) >= 5000:
            self.parents = {}
        par_res = Assertion.objects\
                           .filter(object_uuid=uuid,
                                   predicate_uuid=Assertion.PREDICATES_CONTAINS)[:1]
        if len(par_res) > 0:
            # item has a parent
            parent_uuid = par_res[0].uuid
            if parent_uuid not in self.parents:
                # we don't have a context path parent list for this parent in memory yet
                # so let's go and make it
                p_list = []
                act_contain = Containment()
                raw_parents = act_contain.get_parents_by_child_uuid(parent_uuid)
                if raw_parents is not False:
                    if len(raw_parents) > 0:
                        for tree_node, r_parents in raw_parents.items():
                            p_list = r_parents
                            break
                p_list.insert(0, parent_uuid)  # add the 1st parent to the start of the list
                context_metadata = {'p_list': p_list}
                self.parents[parent_uuid] = context_metadata
            else:
                context_metadata = self.parents[parent_uuid] 
        else:
            parent_uuid = False
        # now get geo and chrono metadata
        context_metadata = self.get_geo_chrono_metadata(uuid,
                                                        parent_uuid,
                                                        context_metadata)
        return context_metadata

    def get_geo_chrono_metadata(self, uuid, parent_uuid, context_metadata):
        """ gets and saves geo and chrono metadata """ 
        act_contain = Containment()
        geo_meta = False
        event_meta = False
        uuid_geo = Geospace.objects.filter(uuid=uuid)[:1]
        if len(uuid_geo) > 0:
            geo_meta = uuid_geo[0]
        else:
            # geo information for this item not found, look to parents
            if parent_uuid is not False \
               and 'p_list' in context_metadata:
                # we have at least 1 parent
                if 'p_geo' not in context_metadata:
                    # no saved geo information in this context path, so look it up 
                    p_list = context_metadata['p_list']
                    geo_meta = act_contain.get_geochron_from_subject_list(p_list, 'geo')
                    context_metadata['p_geo'] = geo_meta
                    self.parents[parent_uuid] = context_metadata
                else:
                    # we have saved geo information for this context path so use it
                    geo_meta = context_metadata['p_geo']
        uuid_event = Event.objects.filter(uuid=uuid)[:1]
        if len(uuid_event) > 0:
            event_meta = uuid_event
        else:
            # chrono information for this item not found, look to parents
            if parent_uuid is not False \
               and 'p_list' in context_metadata:
                # we have at least 1 parent
                if 'p_event' not in context_metadata:
                    # no saved chrono information in this context path, so look it up 
                    p_list = context_metadata['p_list']
                    event_meta = act_contain.get_geochron_from_subject_list(p_list, 'event')
                    context_metadata['p_event'] = event_meta
                    self.parents[parent_uuid] = context_metadata
                else:
                    # we have saved chrono information for this context path so use it
                    event_meta = context_metadata['p_event']
        context_metadata['geo'] = geo_meta
        context_metadata['event'] = event_meta
        return context_metadata

    def get_predicate_uuids(self):
        """ Gets predicate uuids for a table """
        self.entities = {}  # resets the entites, no need to keep context entitites in memory
        self.check_reload_fields_from_db()  # gets fields from DB, if process was interrupted
        limit_obs = False
        if isinstance(self.obs_limits, list):
            if len(self.obs_limits) > 0:
                limit_obs = True
        uuids = UUIDListExportTable(self.table_id).uuids
        # seems faster than a select distinct with a join.
        for uuid in uuids:
            if limit_obs:
                pred_uuids = Assertion.objects\
                                      .values_list('predicate_uuid', flat=True)\
                                      .filter(uuid=uuid,
                                              obs_num__in=self.obs_limits)
            else:
                pred_uuids = Assertion.objects\
                                      .values_list('predicate_uuid', flat=True)\
                                      .filter(uuid=uuid)
            item_preds = LastUpdatedOrderedDict()
            for pred_uuid in pred_uuids:
                if pred_uuid not in item_preds:
                    item_preds[pred_uuid] = 1
                else:
                    item_preds[pred_uuid] += 1
            for pred_uuid, count in item_preds.items():
                if pred_uuid not in self.predicate_uuids:
                    pred_label = self.deref_entity_label(pred_uuid)
                    pred_type = self.entities[pred_uuid].data_type
                    self.predicate_uuids[pred_uuid] = {'count': count,
                                                       'label': pred_label,
                                                       'type': pred_type}
                else:
                    if self.predicate_uuids[pred_uuid]['count'] < count:
                        self.predicate_uuids[pred_uuid]['count'] = count
        return self.predicate_uuids

    def get_predicate_link_annotations(self):
        """ Gets the link data annotations for predicates used on a table """
        auth = Authorship()
        for pred_uuid, pred in self.predicate_uuids.items():
            la_s = LinkAnnotation.objects\
                                 .filter(subject=pred_uuid)
            if len(la_s) > 0:
                self.predicate_uuids[pred_uuid]['annotations'] = []
                self.predicate_uuids[pred_uuid]['ld-equiv'] = []
            for la in la_s:
                link_anno = {'pred': la.predicate_uri,
                             'obj': la.object_uri}
                self.predicate_uuids[pred_uuid]['annotations'].append(link_anno)
                if la.predicate_uri in self.EQUIV_PREDICATES:
                    authorship = auth.check_authorship_object(la.object_uri)
                    if authorship is False:  # only keep predicates not related to authorship
                        pred_ld_equiv_uri = la.object_uri  # the object_uri is equivalent to
                                                           # the predicate_uuid
                        self.predicate_uuids[pred_uuid]['ld-equiv'].append(pred_ld_equiv_uri)
                        if la.object_uri not in self.ld_predicates:
                            pred_equiv_label = self.deref_entity_label(pred_ld_equiv_uri)
                            self.ld_predicates[pred_ld_equiv_uri] = {'uuids': [pred_uuid],
                                                                     'obj_uuids': {},
                                                                     'obj_uris': [],
                                                                     'label': pred_equiv_label}
                        else:
                            self.ld_predicates[pred_ld_equiv_uri]['uuids'].append(pred_uuid)
        return self.ld_predicates

    def process_ld_predicates_values(self):
        """ Processes linked uri equivalents for predicates to
            get linked data for objects assocated with these predicates
        """
        if self.include_equiv_ld and len(self.ld_predicates) > 0:
            for pred_ld_equiv_uri, ld_pred in self.ld_predicates.items():
                self.get_ld_predicate_values(pred_ld_equiv_uri)

    def get_ld_predicate_values(self, pred_ld_equiv_uri):
        """ gets a list of object_uuids used with predicates related to a
            ld_field_uri
        """
        object_uuids = Assertion.objects\
                                .values_list('object_uuid', flat=True)\
                                .filter(predicate_uuid__in=self.ld_predicates[pred_ld_equiv_uri]['uuids'])\
                                .distinct()
        for obj_uuid in object_uuids:
            if obj_uuid not in self.ld_object_equivs:
                self.ld_object_equivs[obj_uuid] = []
            if obj_uuid not in self.ld_predicates[pred_ld_equiv_uri]['obj_uuids']:
                obj_equiv_uris = []
                # get link data annotations for the object_uuid
                la_s = LinkAnnotation.objects\
                                     .filter(subject=obj_uuid)
                for la in la_s:
                    if la.predicate_uri in self.EQUIV_PREDICATES:
                        obj_equiv_uri = la.object_uri
                        if obj_equiv_uri not in self.ld_predicates[pred_ld_equiv_uri]['obj_uris']:
                            self.ld_predicates[pred_ld_equiv_uri]['obj_uris'].append(obj_equiv_uri)
                        if obj_equiv_uri not in self.ld_object_equivs[obj_uuid]:
                            self.ld_object_equivs[obj_uuid].append(obj_equiv_uri)
        return self.ld_predicates[pred_ld_equiv_uri]

    def do_boolean_multiple_ld_fields(self, pred_ld_equiv_uri):
        """ Checks to see if a ld_field_uri (equivalent to a predicate_uuid in assertions)
            has multiple values in a given item. If so, then returns true.
            Otherwise, this returns false.
        """
        output = False
        if self.boolean_multiple_ld_fields is not False:
            if pred_ld_equiv_uri in self.ld_predicates:
                for predicate_uuid in self.ld_predicates[pred_ld_equiv_uri]['uuids']:
                    if predicate_uuid in self.predicate_uuids:
                        if self.predicate_uuids[predicate_uuid]['count'] > 1:
                            output = True
        return output

    def save_source_fields(self):
        """ Creates fields for source data, then saves
            records of source data for each item in the export
            table
        """
        if self.include_original_fields and len(self.predicate_uuids) > 0:
            limit_obs = False
            if isinstance(self.obs_limits, list):
                if len(self.obs_limits) > 0:
                    limit_obs = True
            pred_uuid_list = []
            for predicate_uuid, pred_dict in self.predicate_uuids.items():
                field_num = self.get_add_predicate_field_number(predicate_uuid)
                pred_uuid_list.append(predicate_uuid)
            # get the rows for the export table
            rows = UUIDsRowsExportTable(self.table_id).rows
            for row in rows:
                if limit_obs:
                    item_data = Assertion.objects.filter(uuid=row['uuid'],
                                                         predicate_uuid__in=pred_uuid_list,
                                                         obs_num__in=self.obs_limits)
                else:
                    item_data = Assertion.objects.filter(uuid=row['uuid'],
                                                         predicate_uuid__in=pred_uuid_list)
                if len(item_data) > 0:
                    self.add_source_cells(row['uuid'],
                                          row['row_num'],
                                          item_data)

    def add_source_cells(self, uuid, row_num, item_data):
        """ Adds source data records for an assertion """
        predicate_values = LastUpdatedOrderedDict()
        project_uuid = item_data[0].project_uuid
        for assertion in item_data:
            predicate_uuid = assertion.predicate_uuid
            object_uuid = assertion.object_uuid
            if assertion.object_type == 'xsd:string':
                try:
                    oc_str = OCstring.objects.get(uuid=object_uuid)
                    obj_val = oc_str.content
                except OCstring.DoesNotExist:
                    obj_val = ''
            elif assertion.object_type in ['xsd:integer', 'xsd:double']:
                # numeric value
                obj_val = str(assertion.data_num)
            elif assertion.object_type == 'xsd:date':
                obj_val = str(assertion.data_date)
            else:
                obj_val = str(self.deref_entity_label(object_uuid))
            if predicate_uuid not in predicate_values:
                # make a list, since some predicates are multi-valued
                predicate_values[predicate_uuid] = []
            predicate_values[predicate_uuid].append(obj_val)
        for predicate_uuid, val_list in predicate_values.items():
            field_num = self.get_add_predicate_field_number(predicate_uuid)
            cell = ExpCell()
            cell.table_id = self.table_id
            cell.uuid = uuid
            cell.project_uuid = project_uuid
            cell.row_num = row_num
            cell.field_num = field_num
            cell.record = self.multi_source_value_delim.join(val_list)  # semi-colon delim for multivalued predicates
            cell.save()
            cell = None

    def get_add_predicate_field_number(self, predicate_uuid):
        """ Gets the field_num for a source predicate_uuid field,
            givem the predicate_uuid
            Creates a new field for the predicate as needed
        """
        if predicate_uuid in self.predicate_fields:
            field_num = self.predicate_fields[predicate_uuid]
        else:
            field_num = len(self.fields) + 1
            label = self.deref_entity_label(predicate_uuid) + ' [Source]'
            rel_ids = [predicate_uuid]
            field = {'label': label,
                     'rel_ids': rel_ids,
                     'field_num': field_num}
            self.fields.append(field)
            self.save_field(field)
            self.predicate_fields[predicate_uuid] = field_num
        return field_num

    def save_ld_fields(self):
        """ Creates fields for linked data, then saves
            records of linked data for each item in the export
            table
        """
        if self.include_equiv_ld and len(self.ld_predicates) > 0:
            for pred_ld_equiv_uri, ld_pred in self.ld_predicates.items():
                if self.do_boolean_multiple_ld_fields(pred_ld_equiv_uri):
                    le_sort = LinkEntitySorter()
                    #  sort the URIs for the objects, so the fields come in a
                    #  nice, reasonable order.
                    sort_obj_uris = le_sort.sort_ld_entity_list(ld_pred['obj_uris'])
                    for ld_obj_uri in sort_obj_uris:
                        # make a field for each linked data pred and object
                        field_num = self.get_add_ld_field_number('[Has]',
                                                                 pred_ld_equiv_uri,
                                                                 ld_obj_uri)
                else:
                    if self.include_ld_obj_uris:
                        field_num = self.get_add_ld_field_number('[URI]',
                                                                 pred_ld_equiv_uri)
                    field_num = self.get_add_ld_field_number('[Label]',
                                                             pred_ld_equiv_uri)
                    if self.include_ld_source_values:
                        field_num = self.get_add_ld_field_number('[Source]',
                                                                 pred_ld_equiv_uri)
            # get the rows for the export table
            rows = UUIDsRowsExportTable(self.table_id).rows
            for row in rows:
                for pred_ld_equiv_uri, ld_pred in self.ld_predicates.items():
                    item_data = Assertion.objects.filter(uuid=row['uuid'],
                                                         predicate_uuid__in=ld_pred['uuids'])
                    if len(item_data) > 0:
                        self.add_ld_cells(row['uuid'],
                                          row['row_num'],
                                          item_data,
                                          pred_ld_equiv_uri)

    def add_ld_cells(self, uuid, row_num, item_data, pred_ld_equiv_uri):
        """ Adds linked data records for an assertion """
        if self.do_boolean_multiple_ld_fields(pred_ld_equiv_uri):
            multi_ld_fields = True
        else:
            multi_ld_fields = False
        obj_values = LastUpdatedOrderedDict()
        obj_values['[URI]'] = []
        obj_values['[Label]'] = []
        obj_values['[Source]'] = []
        project_uuid = item_data[0].project_uuid
        for assertion in item_data:
            object_uuid = assertion.object_uuid
            if assertion.object_type == 'xsd:string':
                try:
                    oc_str = OCstring.objects.get(uuid=object_uuid)
                    obj_label = oc_str.content
                except OCstring.DoesNotExist:
                    obj_label = ''
            else:
                obj_label = self.deref_entity_label(object_uuid)
                obj_label = str(obj_label)
            if obj_label not in obj_values['[Source]']:
                obj_values['[Source]'].append(obj_label)
            obj_ld_found = False
            if object_uuid in self.ld_object_equivs:
                for obj_ld_equiv_uri in self.ld_object_equivs[object_uuid]:
                    obj_ld_found = True
                    if multi_ld_fields:
                        cell_value = self.boolean_multiple_ld_fields
                        field_num = self.get_add_ld_field_number('[Has]',
                                                                 pred_ld_equiv_uri,
                                                                 obj_ld_equiv_uri)
                        cell = ExpCell()
                        cell.table_id = self.table_id
                        cell.uuid = uuid
                        cell.project_uuid = project_uuid
                        cell.row_num = row_num
                        cell.field_num = field_num
                        cell.record = cell_value
                        cell.save()
                        cell = None
                    else:
                        # predicate not broken into seperate fields for different values
                        obj_equiv_label = self.deref_entity_label(obj_ld_equiv_uri)
                        if obj_equiv_label is False:
                            obj_equiv_label = obj_ld_equiv_uri
                        if obj_equiv_label not in obj_values['[Label]']:
                            obj_values['[Label]'].append(obj_equiv_label)
                        if obj_ld_equiv_uri not in obj_values['[URI]']:
                            obj_values['[URI]'].append(obj_ld_equiv_uri)
            if obj_ld_found is False:
                print('No linked data for object:' + object_uuid)
        if multi_ld_fields is False:
            # predicate not broken into seperate fields for different values
            for field_type, value_list in obj_values.items():
                if len(value_list) > 0:
                    try:
                        cell_value = '; '.join(value_list)
                    except:
                        # some messiness in the data, won't join into a string
                        cell_value = False
                        for val in value_list:
                            val = str(val)
                            if cell_value is False:
                                cell_value = val
                            else:
                                cell_value += '; ' + val
                    field_num = self.get_add_ld_field_number(field_type,
                                                             pred_ld_equiv_uri)
                    cell = ExpCell()
                    cell.table_id = self.table_id
                    cell.uuid = uuid
                    cell.project_uuid = project_uuid
                    cell.row_num = row_num
                    cell.field_num = field_num
                    cell.record = cell_value
                    cell.save()
                    cell = None

    def get_add_ld_field_number(self,
                                field_type,
                                pred_ld_equiv_uri,
                                obj_ld_equiv_uri=False):
        """ Gets the field_num for a linked data field, given the uri
            for the linked data field, and optionally the object
            Creates a new field for the linked data as needed
        """
        if obj_ld_equiv_uri is not False:
            field_key = pred_ld_equiv_uri + '::' + obj_ld_equiv_uri
        else:
            field_key = pred_ld_equiv_uri
        if field_type is not False:
            if len(field_type) > 0:
                field_key += '::' + field_type
        else:
            field_key += '::[Type unknown]'
        if field_key in self.ld_fields:
            field_num = self.ld_fields[field_key]
        else:
            field_num = len(self.fields) + 1
            label = self.deref_entity_label(pred_ld_equiv_uri)
            if label is False:
                label = pred_ld_equiv_uri
            rel_ids = [field_type, pred_ld_equiv_uri]
            if obj_ld_equiv_uri is not False:
                rel_ids.append(obj_ld_equiv_uri)
                obj_label = self.deref_entity_label(obj_ld_equiv_uri)
                if obj_label is False:
                    obj_label = obj_ld_equiv_uri
                label = label + ' :: ' + str(obj_label)
            if field_type is not False:
                if len(field_type) > 0:
                    label += ' ' + field_type
            field = {'label': label,
                     'rel_ids': rel_ids,
                     'field_num': field_num}
            self.fields.append(field)
            self.save_field(field)
            self.ld_fields[field_key] = field_num
        return field_num

    def save_context(self, row_num, man, parent_list):
        """ Save context information, will also add new context fields
            as needed
        """
        use_parents = False
        context_uri = ''
        if isinstance(parent_list, list):
            if len(parent_list) > 0:
                context_uri = URImanagement.make_oc_uri(parent_list[0], 'subjects')
                use_parents = parent_list[::-1]
        # save a record of the context URI
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 13
        cell.record = context_uri
        cell.save()
        cell = None
        if use_parents is not False:
            pindex = 0
            for parent_uuid in use_parents:
                pindex += 1
                context_label = self.deref_entity_label(parent_uuid)
                field_num = self.get_add_context_field_number(pindex)
                cell = ExpCell()
                cell.table_id = self.table_id
                cell.uuid = man.uuid
                cell.project_uuid = man.project_uuid
                cell.row_num = row_num
                cell.field_num = field_num
                cell.record = context_label
                cell.save()
                cell = None

    def get_add_context_field_number(self, pindex):
        """ Gets the field_num for a context field, given the pindex
            which indicates depth in the context hierarchy.
            Creates a new field for the context level as needed
        """
        if pindex in self.context_fields:
            field_num = self.context_fields[pindex]
        else:
            field_num = len(self.fields) + 1
            field = {'label': 'Context (' + str(pindex) + ')',
                     'rel_ids': ['context', pindex],
                     'field_num': field_num}
            self.fields.append(field)
            self.save_field(field)
            self.context_fields[pindex] = field_num
        return field_num

    def save_default_chrono(self, row_num, man, event_meta):
        """ Saves earliest / latest times for an item """
        earliest = ''
        latest = ''
        if event_meta is not False:
            times = []
            for event in event_meta:
                times.append(event.start)
                times.append(event.stop)
            earliest = min(times)
            latest = max(times)
            if self.dates_bce_ce is False:
                earliest = 1950 - earliest
                latest = 1950 - latest
            earliest = round(earliest, 0)
            latest = round(latest, 0)
        # save earliest
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 11
        cell.record = str(earliest)
        cell.save()
        cell = None
        # save latest
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 12
        cell.record = str(latest)
        cell.save()
        cell = None

    def save_default_geo(self, row_num, man, geo_meta):
        """ Saves geo lat / lon data for an item """
        latitude = ''
        longitude = ''
        note = 'Best available location data'
        if geo_meta is not False:
            for geo in geo_meta:
                if geo.meta_type == 'oc-gen:discovey-location':
                    latitude = geo.latitude
                    longitude = geo.longitude
                    if geo.specificity < 0:
                        note = 'Location approximated '
                        note += 'as a security precaution (Zoom: ' + str(abs(geo.specificity)) + ')'
                    break
        # save Latitude
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 8
        cell.record = str(latitude)
        cell.save()
        cell = None
        # save Longitude
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 9
        cell.record = str(longitude)
        cell.save()
        cell = None
        # save Note
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 10
        cell.record = note
        cell.save()
        cell = None

    def save_authorship(self, row_num, man):
        """ Saves authorship information """
        authors = ''
        auth = Authorship()
        found = auth.get_authors(man.uuid,
                                 man.project_uuid)
        if found:
            # save counts of different dc-terms:creator for use as table metadata
            for auth_id in auth.creators:
                if auth_id not in self.dc_creator_ids:
                    self.dc_creator_ids[auth_id] = 0
                self.dc_creator_ids[auth_id] += 1
            # save counts of different dc-terms:contributor for use as table metadata    
            for auth_id in auth.contributors:
                if auth_id not in self.dc_contributor_ids:
                    self.dc_contributor_ids[auth_id] = 0
                self.dc_contributor_ids[auth_id] += 1    
            all_author_ids = auth.creators + auth.contributors
            all_authors = []
            for auth_id in all_author_ids:
                author = self.deref_entity_label(auth_id)
                all_authors.append(author)
            authors = '; '.join(all_authors)
        # save Authors
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 7
        cell.record = authors
        cell.save()
        cell = None

    def save_basic_default_field_cells(self, row_num, man):
        """ Saves the default fields that do not involve containment lookups """
        # save URI
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 1
        cell.record = URImanagement.make_oc_uri(man.uuid, man.item_type)
        cell.save()
        cell = None
        # save label
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 2
        cell.record = man.label
        cell.save()
        cell = None
        # save project label
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 3
        cell.record = self.deref_entity_label(man.project_uuid)
        cell.save()
        cell = None
        # save project URI
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 4
        cell.record = URImanagement.make_oc_uri(man.project_uuid, 'projects')
        cell.save()
        cell = None
        # save item category / class
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 5
        cell.record = self.deref_entity_label(man.class_uri)
        cell.save()
        cell = None
        # last updated
        if man.revised is datetime:
            last_update = man.revised
        else:
            last_update = man.record_updated
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 6
        cell.record = last_update.strftime('%Y-%m-%d')
        cell.save()
        cell = None

    def update_table_metadata(self):
        """ saves the final table author metadata """
        try:
            exp_tab = ExpTable.objects.get(table_id=self.table_id)
        except ExpTable.DoesNotExist:
            exp_tab = ExpTable()
            exp_tab.table_id = self.table_id
            exp_tab.label = '[Not yet named]'
        tcells_ok = ExpCell.objects.filter(table_id=self.table_id)[:1]
        if len(tcells_ok):
            sum_cell = ExpCell.objects\
                              .filter(table_id=self.table_id)\
                              .aggregate(Max('row_num'))
            exp_tab.row_count = sum_cell['row_num__max']
        else:
            exp_tab.row_count = 0
        tfields_ok = ExpField.objects.filter(table_id=self.table_id)[:1]
        if len(tfields_ok):
            sum_field = ExpField.objects\
                                .filter(table_id=self.table_id)\
                                .aggregate(Max('field_num'))
            exp_tab.field_count = sum_field['field_num__max']
        else:
            exp_tab.field_count = 0
        authors = LastUpdatedOrderedDict()
        if len(self.dc_contributor_ids) > 0:
            sauthors = sorted(self.dc_contributor_ids.items(),
                              key=lambda x: (-x[1], x[0]))
            authors['dc-terms:contributor'] = self.add_author_list(sauthors,
                                                                   'contributor')
        if len(self.dc_creator_ids) > 0:
            sauthors = sorted(self.dc_creator_ids.items(),
                              key=lambda x: (-x[1], x[0]))
            authors['dc-terms:creator'] = self.add_author_list(sauthors,
                                                               'creator')
        exp_tab.meta_json = authors
        exp_tab.save()

    def add_author_list(self, sauthors, dc_type):
        """ makes an author list from a sorted tuple of
            author identifiers
        """
        i = 0
        author_list = []
        for uri_key, count in sauthors:
            i += 1
            auth = LastUpdatedOrderedDict()
            auth['id'] = '#' + dc_type + '-' + str(i)
            if 'http://' in uri_key or 'https://' in uri_key:
                auth['rdfs:isDefinedBy'] = uri_key
            else:
                auth['rdfs:isDefinedBy'] = URImanagement.make_oc_uri(uri_key,
                                                                     'persons')
            auth['label'] = self.deref_entity_label(uri_key)
            auth['count'] = count
            author_list.append(auth)
        return author_list

    def recursive_context_build(self,
                                parent_level=0):
        """ recusrively builds a list of parent contexts """
        if parent_level == 0:
            sql = 'INSERT INTO exp_records(table_id, uuid, project_uuid,\
                   row_num, field_num, record_id, record)\
                   SELECT exp.table_id, exp.uuid, exp.project_uuid,\
                   exp.row_num, -1, pman.label, ass.uuid \
                   FROM exp_records AS exp \
                   LEFT OUTER JOIN oc_assertions AS ass\
                   ON (ass.object_uuid = exp.uuid \
                       AND ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\') \
                   LEFT OUTER JOIN oc_manifest AS pman ON (ass.uuid = pman.uuid) \
                   WHERE ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\' \
                   AND exp.table_id = \'' + self.table_id + '\' \
                   AND exp.field_num = 1; '
        else:
            sql = 'INSERT INTO exp_records(table_id, uuid, project_uuid,\
                   row_num, field_num, record_id, record)\
                   SELECT exp.table_id, exp.uuid, exp.project_uuid,\
                   exp.row_num, -1, pman.label, ass.uuid \
                   FROM exp_records AS exp \
                   LEFT OUTER JOIN oc_assertions AS ass\
                   ON (ass.object_uuid = exp.uuid \
                       AND ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\') \
                   LEFT OUTER JOIN oc_manifest AS pman ON (ass.uuid = pman.uuid) \
                   WHERE ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\' \
                   AND exp.table_id = \'' + self.table_id + '\' \
                   AND exp.field_num = ' + parent_level + ' ;'
        parent_res = cursor.execute(sql)
        print(str(parent_res))
        parent_level = parent_level - 1

    def deref_entity_label(self, entity_id):
        """ Dereferences an entity """
        output = False
        if entity_id in self.entities:
            ent = self.entities[entity_id]
            output = ent.label
        else:
            ent = Entity()
            found = ent.dereference(entity_id)
            if found:
                output = ent.label
                self.entities[entity_id] = ent
            else:
                print('Missing id: ' + entity_id)
        return output
Пример #8
0
 def infer_assertions_for_item_json_ld(self, json_ld):
     """Makes a list of inferred assertions from item json ld """
     lang_obj = Languages()
     inferred_assertions = []
     if not isinstance(json_ld, dict):
         return inferred_assertions
     if not ItemKeys.PREDICATES_OCGEN_HASOBS in json_ld:
         return inferred_assertions
     unique_pred_assertions = LastUpdatedOrderedDict()
     for obs_dict in json_ld[ItemKeys.PREDICATES_OCGEN_HASOBS]:
         # Get the status of the observation, defaulting to 'active'. If
         # active, then it's OK to infer assertions, otherwise skip the
         # observation.
         obs_status = obs_dict.get(ItemKeys.PREDICATES_OCGEN_OBSTATUS, 'active')
         if obs_status != 'active':
             # Skip this observation. It's there but has a deprecated
             # status.
             continue
         for obs_pred_key, obj_values in obs_dict.items():
             if obs_pred_key in self.LINKDATA_OBS_PREDS_SKIP:
                 # Skip this obs_pred_key, it is a general
                 # description of the observation, and will
                 # not have any linked assertions to infer.
                 continue
             obs_pred_info = self.lookup_predicate(obs_pred_key)
             pred_data_type = self.get_predicate_datatype_for_graph_obj(obs_pred_info)
             equiv_pred_objs = self.get_equivalent_objects(obs_pred_info)
             if not equiv_pred_objs:
                 # No linked data equivalence for the obs_pred_key
                 # so continue, skipping the rest.
                 continue
             # Start with a None assertion.
             assertion = None
             # We're ony going to use the first equivalent of a predicate
             # otherwise this gets too complicated.
             equiv_pred_obj = equiv_pred_objs[0]
             equiv_pred_uri = self.get_id_from_g_obj(equiv_pred_obj)
             # Inferred assertions will have unique LOD predicates, with
             # one or more values. The unique_pred_assertions dict makes
             # sure the LOD predicates are used only once.
             if not equiv_pred_uri in unique_pred_assertions:
                 assertion = equiv_pred_obj
                 assertion['type'] = pred_data_type
                 assertion['ld_objects'] = LastUpdatedOrderedDict()
                 assertion['oc_objects'] = LastUpdatedOrderedDict()
                 assertion['literals'] = []
                 unique_pred_assertions[equiv_pred_uri] = assertion
                 assertion = unique_pred_assertions[equiv_pred_uri]
             if assertion and equiv_pred_uri:
                 # we have a LOD equvalient property
                 if not isinstance(obj_values, list):
                     obj_values = [obj_values]
                 for obj_val in obj_values:
                     literal_val = None
                     if not isinstance(obj_val, dict):
                         # the object of the assertion is not a dict, so it must be
                         # a literal
                         literal_val = obj_val
                         if obj_val not in assertion['literals']:
                             assertion['literals'].append(obj_val)
                     elif 'xsd:string' in obj_val:
                         literal_val = lang_obj.get_all_value_str(obj_val['xsd:string'])
                     if literal_val and literal_val not in assertion['literals']:
                         assertion['literals'].append(literal_val)
                     if literal_val is None:
                         # Add any linked data equivalences by looking for this
                         # type in the graph list
                         obj_val = self.lookup_type_by_type_obj(obj_val)
                         obj_uri = self.get_id_from_g_obj(obj_val)
                         equiv_obj_objs = self.get_equivalent_objects(obj_val)           
                         if len(equiv_obj_objs):
                             # We have LD equivalents for the object value
                             for equiv_obj_obj in equiv_obj_objs:
                                 equiv_obj_uri = self.get_id_from_g_obj(equiv_obj_obj)
                                 assertion['ld_objects'][equiv_obj_uri] = equiv_obj_obj
                         elif obj_uri:
                             # We don't have LD equivalents for the object value
                             # add to the oc_objects
                             assertion['oc_objects'][obj_uri] = obj_val
                         unique_pred_assertions[equiv_pred_uri] = assertion
     for pred_key, assertion in unique_pred_assertions.items():                            
         inferred_assertions.append(assertion)
     return inferred_assertions
Пример #9
0
    def save_partial_clean_file(self,
                                json_obj,
                                act_dir,
                                filename,
                                id_prop,
                                ok_ids=[],
                                add_props={},
                                combine_json_obj=None):
        """ saves a new json file with clean cordinates (to facilitate debugging) """
        all_ids = False
        if not ok_ids:
            all_ids = True
        new_json = LastUpdatedOrderedDict()
        new_json['type'] = 'FeatureCollection'
        new_json['features'] = []
        for feature in json_obj['features']:
            min_lon = None
            max_lon = None
            min_lat = None
            max_lat = None
            if all_ids or id_prop in feature['properties']:
                feature_id = feature['properties'][id_prop]
                feature['id'] = feature_id
                if all_ids or feature_id in ok_ids:
                    if feature_id in add_props:
                        id_add_props = add_props[feature_id]
                        for key, value in id_add_props.items():
                            feature['properties'][key] = value
                            if key == 'uri':
                                uuid = value.split('/')[-1]
                                sub = Subject.objects.get(uuid=uuid)
                                feature['properties'][
                                    'context'] = sub.context.replace(
                                        'Italy/', '')
                                asses = Assertion.objects.filter(
                                    uuid=uuid, object_type='documents')
                                d_uuids = []
                                for ass in asses:
                                    if ass.object_uuid not in d_uuids:
                                        d_uuids.append(ass.object_uuid)
                                d_mans = Manifest.objects.filter(
                                    uuid__in=d_uuids)
                                min_len = 10000000
                                for d_man in d_mans:
                                    if len(d_man.label) < min_len:
                                        min_len = len(d_man.label)
                                        feature['properties'][
                                            'trench-book'] = d_man.label
                    geometry_type = feature['geometry']['type']
                    coordinates = feature['geometry']['coordinates']
                    v_geojson = ValidateGeoJson()
                    c_ok = v_geojson.validate_all_geometry_coordinates(
                        geometry_type, coordinates)
                    if not c_ok:
                        coordinates = v_geojson.fix_geometry_rings_dir(
                            geometry_type, coordinates)
                        feature['geometry']['coordinates'] = coordinates
                    if geometry_type == 'Polygon':
                        poly = Polygon(coordinates)
                        act_feature = geojson.Feature(geometry=poly)
                        cors = geojson.utils.coords(act_feature)
                        for cor in cors:
                            if min_lon is None or min_lon > cor[0]:
                                min_lon = cor[0]
                            if max_lon is None or max_lon < cor[0]:
                                max_lon = cor[0]
                            if min_lat is None or min_lat > cor[1]:
                                min_lat = cor[1]
                            if max_lat is None or max_lat < cor[1]:
                                max_lat = cor[1]
                        if combine_json_obj:
                            feature['properties']['p-uris'] = ''
                            print('Limit to {}, {} :: {}, {}'.format(
                                min_lon, min_lat, max_lon, max_lat))
                            near_contexts = []
                            near_uris = []
                            contexts = []
                            uris = []
                            for cfeature in combine_json_obj['features']:
                                near = True
                                inside = False
                                cgeometry_type = cfeature['geometry']['type']
                                if cgeometry_type == 'Point':
                                    ccors = cfeature['geometry']['coordinates']
                                    if ccors[0] < min_lon or ccors[0] > max_lon:
                                        near = False
                                    if ccors[1] < min_lat or ccors[1] > max_lat:
                                        near = False
                                    spoly = shape(feature['geometry'])
                                    point = Point(ccors)  # create point
                                    inside = spoly.contains(point)
                                    # print('inside?: {}'.format(inside))
                                if 'uri' in cfeature['properties'] and (
                                        near or inside):
                                    uri = cfeature['properties']['uri']
                                    if inside:
                                        uris.append(uri)
                                    if near:
                                        near_uris.append(uri)
                                    uuid = uri.split('/')[-1]
                                    sub = Subject.objects.get(uuid=uuid)
                                    context = '/'.join(
                                        sub.context.split('/')[0:5])
                                    if near:
                                        near_contexts.append(context)
                                    if inside:
                                        contexts.append(context)
                                    # new_json['features'].append(cfeature)
                            n_common_context, n_all_contexts, n_c_uuid = self.make_context_count_str(
                                near_contexts)
                            common_context, all_contexts, c_uuid = self.make_context_count_str(
                                contexts)
                            feature['properties']['p-uris'] = '; '.join(uris)
                            feature['properties'][
                                'n-contexts'] = n_all_contexts
                            feature['properties'][
                                'n-context'] = n_common_context
                            feature['properties']['n-c-uuid'] = n_c_uuid
                            feature['properties']['contexts'] = all_contexts
                            feature['properties']['context'] = common_context
                            feature['properties']['c-uuid'] = c_uuid
                    new_json['features'].append(feature)

        dir_file = self.set_check_directory(
            act_dir) + '/id-clean-coord-' + filename
        self.save_json_file(new_json, None, None, dir_file=dir_file)
Пример #10
0
def entity_annotations(request, subject):
    """ Returns JSON data with
        annotations on a given subject entity
    """
    ent = Entity()
    found = ent.dereference(subject)
    if found is False:
        found = ent.dereference(subject, subject)
    if found:
        # we found the subject entity, now get linked data assertions
        # make an object for computing hrefs to local host version of OC-URIs
        rp = RootPath()
        # make a result dict
        result = LastUpdatedOrderedDict()
        result['list'] = []  # list of link data annotations
        result['preds_objs'] = []  # list of predicates, then of objects
        result['stable_ids'] = []  # list of stable_ids
        la_list = LinkAnnotation.objects\
                                .filter(subject=subject)\
                                .order_by('predicate_uri', 'sort')
        for la in la_list:
            item = LastUpdatedOrderedDict()
            obj_item = LastUpdatedOrderedDict()
            item['hash_id'] = la.hash_id
            obj_item['hash_id'] = la.hash_id
            item['subject'] = la.subject
            item['subject_type'] = la.subject_type
            item['project_uuid'] = la.project_uuid
            if la.sort is None:
                la.sort = 0
            item['sort'] = float(la.sort)
            obj_item['sort'] = float(la.sort)
            item['predicate_uri'] = la.predicate_uri
            p_ent = Entity()
            p_found = p_ent.dereference(la.predicate_uri)
            if p_found:
                item['predicate_label'] = p_ent.label
            else:
                item['predicate_label'] = False
            item['object_uri'] = la.object_uri
            obj_item['id'] = la.object_uri
            obj_item['href'] = obj_item['id'].replace(settings.CANONICAL_HOST,
                                                      rp.get_baseurl())
            o_ent = Entity()
            o_found = o_ent.dereference(la.object_uri)
            if o_found:
                item['object_label'] = o_ent.label
                obj_item['label'] = o_ent.label
            else:
                item['object_label'] = False
                obj_item['label'] = False
            pred_key_found = False
            for pred_list in result['preds_objs']:
                if pred_list['id'] == la.predicate_uri:
                    pred_list['objects'].append(obj_item)
                    pred_key_found = True
            if pred_key_found is False:
                pred_obj = LastUpdatedOrderedDict()
                pred_obj['id'] = item['predicate_uri']
                pred_obj['label'] = item['predicate_label']
                pred_obj['href'] = pred_obj['id'].replace(
                    settings.CANONICAL_HOST, rp.get_baseurl())
                if 'https://' not in pred_obj['href'] \
                   and 'http://' not in pred_obj['href']:
                    pred_obj['href'] = False
                pred_obj['objects'] = [obj_item]
                result['preds_objs'].append(pred_obj)
            result['list'].append(item)
        # now lets get any stable identifiers for this item
        s_ids = StableIdentifer.objects\
                               .filter(uuid=ent.uuid)
        id_type_prefixes = StableIdentifer.ID_TYPE_PREFIXES
        for s_id in s_ids:
            stable_id = LastUpdatedOrderedDict()
            stable_id['type'] = s_id.stable_type
            stable_id['stable_id'] = s_id.stable_id
            stable_id['id'] = False
            if s_id.stable_type in id_type_prefixes:
                stable_id['id'] = id_type_prefixes[s_id.stable_type]
                stable_id['id'] += s_id.stable_id
            result['stable_ids'].append(stable_id)
        json_output = json.dumps(result, indent=4, ensure_ascii=False)
        return HttpResponse(json_output,
                            content_type='application/json; charset=utf8')
    else:
        raise Http404
Пример #11
0
 def process_solr_polygons(self, solr_polygons):
     """ processes the solr_json 
         discovery geo tiles,
         aggregating to a certain
         depth
     """
     if self.response_zoom_scope >= self.polygon_min_zoom_scope:
         # we're at a zoom level small enough to make it
         # worthwile to return complex contained-in polygon features
         self.get_polygon_db_objects(solr_polygons)
         i = 0
         cnt_i = -1
         for poly_key in solr_polygons[::2]:
             cnt_i += 2
             solr_facet_count = solr_polygons[cnt_i]
             parsed_key = self.parse_solr_value_parts(poly_key)
             # print('Key: ' + str(parsed_key))
             uuid = parsed_key['uuid']
             if isinstance(uuid, str):
                 if uuid in self.subjects_objs \
                    and uuid in self.geo_objs:
                     # we have Subjects and Geospatial models for this
                     # uuid
                     subj_obj = self.subjects_objs[uuid]
                     geo_obj = self.geo_objs[uuid]
                     i += 1
                     fl = FilterLinks()
                     fl.base_request_json = self.filter_request_dict_json
                     fl.spatial_context = self.spatial_context
                     new_rparams = fl.add_to_request(
                         'path', subj_obj.context)
                     record = LastUpdatedOrderedDict()
                     record['id'] = fl.make_request_url(new_rparams)
                     record['json'] = fl.make_request_url(
                         new_rparams, '.json')
                     record['count'] = solr_facet_count
                     record['type'] = 'Feature'
                     record['category'] = 'oc-api:geo-contained-in-feature'
                     if self.min_date is not False \
                        and self.max_date is not False:
                         when = LastUpdatedOrderedDict()
                         when['id'] = '#event-feature-' + uuid
                         when['type'] = 'oc-gen:formation-use-life'
                         # convert numeric to GeoJSON-LD ISO 8601
                         when['start'] = ISOyears().make_iso_from_float(
                             self.min_date)
                         when['stop'] = ISOyears().make_iso_from_float(
                             self.max_date)
                         record['when'] = when
                     geometry = LastUpdatedOrderedDict()
                     geometry['id'] = '#geo-disc-feature-geom-' + uuid
                     geometry['type'] = geo_obj.ftype
                     coord_obj = json.loads(geo_obj.coordinates)
                     v_geojson = ValidateGeoJson()
                     coord_obj = v_geojson.fix_geometry_rings_dir(
                         geo_obj.ftype, coord_obj)
                     geometry['coordinates'] = coord_obj
                     record['geometry'] = geometry
                     properties = LastUpdatedOrderedDict()
                     properties['id'] = '#geo-disc-feature-' + uuid
                     properties['href'] = record['id']
                     properties['item-href'] = parsed_key['href']
                     properties['label'] = subj_obj.context
                     properties['feature-type'] = 'containing-region'
                     properties['count'] = solr_facet_count
                     properties['early bce/ce'] = self.min_date
                     properties['late bce/ce'] = self.max_date
                     record['properties'] = properties
                     dump = json.dumps(record, ensure_ascii=False, indent=4)
                     geojson_obj = geojson.loads(dump)
                     self.geojson_features.append(record)
Пример #12
0
    def make_geo_contained_in_facet_options(self, solr_json):
        """Gets geospace item query set from a list of options tuples"""
        geosource_path_keys = (configs.FACETS_SOLR_ROOT_PATH_KEYS +
                               ['disc_geosource'])
        geosource_val_count_list = utilities.get_dict_path_value(
            geosource_path_keys, solr_json, default=[])
        if not len(geosource_val_count_list):
            return None

        # Make the list of tile, count tuples.
        options_tuples = utilities.get_facet_value_count_tuples(
            geosource_val_count_list)
        if not len(options_tuples):
            return None

        uuids = []
        parsed_solr_entities = {}
        uuid_geo_dict = {}
        for solr_entity_str, count in options_tuples:
            parsed_entity = utilities.parse_solr_encoded_entity_str(
                solr_entity_str, base_url=self.base_url)
            if not parsed_entity:
                logger.warn(
                    'Cannot parse entity from {}'.format(solr_entity_str))
                continue
            if not '/' in parsed_entity['uri']:
                logger.warn('Invalid uri from {}'.format(solr_entity_str))
                continue
            uri_parts = parsed_entity['uri'].split('/')
            uuid = uri_parts[-1]
            parsed_entity['uuid'] = uuid
            parsed_solr_entities[solr_entity_str] = parsed_entity
            uuids.append(uuid)

        # Make a dictionary of geospace objects keyed by uuid. This
        # will hit the database in one query to get all geospace
        # objects not present in the cache.
        uuid_geo_dict = self._make_cache_geospace_obj_dict(uuids)

        # Make a dict of context paths, keyed by uuid. This will also
        # hit the database in only 1 query, for all context paths not
        # already present in the cache.
        uuid_context_dict = self._get_cache_contexts_dict(uuids)

        # Now make the final
        geo_options = []
        for solr_entity_str, count in options_tuples:
            if solr_entity_str not in parsed_solr_entities:
                # This solr_entity_str did not validate to extract a UUID.
                continue
            parsed_entity = parsed_solr_entities[solr_entity_str]
            uuid = parsed_entity['uuid']
            geo_obj = uuid_geo_dict.get(uuid)
            if geo_obj is None:
                logger.warn('No geospace object for {}'.format(uuid))
                continue

            context_path = uuid_context_dict.get(uuid)
            if context_path is None:
                logger.warn('No context path for {}'.format(uuid))
                continue

            sl = SearchLinks(request_dict=copy.deepcopy(self.request_dict),
                             base_search_url=self.base_search_url)
            # Remove non search related params.
            sl.remove_non_query_params()

            # Update the request dict for this facet option.
            sl.replace_param_value(
                'path',
                match_old_value=None,
                new_value=context_path,
            )
            urls = sl.make_urls_from_request_dict()

            # NOTE: We're not checking if the URLs are the same
            # as the current search URL, because part of the point
            # of listing these features is for visualization display
            # in the front end.

            option = LastUpdatedOrderedDict()

            # The fragment id in the URLs are so we don't have an
            # ID collision with context facets.
            option['id'] = urls['html'] + '#geo-in'
            option['json'] = urls['json'] + '#geo-in'

            option['count'] = count
            option['type'] = 'Feature'
            option['category'] = 'oc-api:geo-contained-in-feature'

            # Add some general chronology information to the
            # geospatial feature.
            option = self._add_when_object_to_feature_option(
                uuid,
                option,
            )

            # Add the geometry from the geo_obj coordinates. First
            # check to make sure they are OK with the the GeoJSON
            # right-hand rule.
            geometry = LastUpdatedOrderedDict()
            geometry['id'] = '#geo-in-geom-{}'.format(uuid)
            geometry['type'] = geo_obj.ftype
            coord_obj = json.loads(geo_obj.coordinates)
            v_geojson = ValidateGeoJson()
            coord_obj = v_geojson.fix_geometry_rings_dir(
                geo_obj.ftype, coord_obj)
            geometry['coordinates'] = coord_obj
            option['geometry'] = geometry

            properties = LastUpdatedOrderedDict()
            properties['id'] = '#geo-in-props-{}'.format(uuid)
            properties['href'] = option['id']
            properties['item-href'] = parsed_entity['uri']
            properties['label'] = context_path
            properties['feature-type'] = 'containing-region'
            properties['count'] = count
            properties['early bce/ce'] = self.min_date
            properties['late bce/ce'] = self.max_date
            option['properties'] = properties

            geo_options.append(option)

        return geo_options
Пример #13
0
    def make_geotile_facet_options(self, solr_json):
        """Makes geographic tile facets from a solr_json response"""
        geotile_path_keys = (configs.FACETS_SOLR_ROOT_PATH_KEYS +
                             ['discovery_geotile'])
        geotile_val_count_list = utilities.get_dict_path_value(
            geotile_path_keys, solr_json, default=[])
        if not len(geotile_val_count_list):
            return None

        # Make the list of tile, count tuples.
        options_tuples = utilities.get_facet_value_count_tuples(
            geotile_val_count_list)
        if not len(options_tuples):
            return None

        valid_tile_tuples = self._make_valid_options_tile_tuples(
            options_tuples)
        if not len(valid_tile_tuples):
            # None of the chronological tiles are valid
            # given the query requirements.
            return None

        # Determine the aggregation depth needed to group geotiles
        # together into a reasonable number of options.
        self._get_tile_aggregation_depth(valid_tile_tuples)

        # Determine the min tile depth. We need to return this to
        # the client so the client knows not to over-zoom.
        tile_lens = [len(tile) for tile, _ in valid_tile_tuples]
        self.min_depth = min(tile_lens)

        # Get the client's requested feature type for the geotile
        # facets.
        feature_type = utilities.get_request_param_value(
            self.request_dict,
            param='geo-facet-type',
            default=self.default_tile_feature_type,
            as_list=False,
            solr_escape=False,
        )
        if feature_type not in self.valid_tile_feature_types:
            # If the requested feature type is not in the
            # valid list of feature types, just use the default.
            feature_type = self.default_tile_feature_type

        aggregate_tiles = {}
        for tile, count in valid_tile_tuples:
            # Now aggregate the tiles.
            trim_tile_key = tile[:self.default_aggregation_depth]
            if trim_tile_key not in aggregate_tiles:
                # Make the aggregate tile with a count
                # of zero
                aggregate_tiles[trim_tile_key] = 0

            aggregate_tiles[trim_tile_key] += count

        options = []
        for tile, count in aggregate_tiles.items():
            sl = SearchLinks(request_dict=copy.deepcopy(self.request_dict),
                             base_search_url=self.base_search_url)
            # Remove non search related params.
            sl.remove_non_query_params()

            # Update the request dict for this facet option.
            sl.replace_param_value(
                'disc-geotile',
                match_old_value=None,
                new_value=tile,
            )
            urls = sl.make_urls_from_request_dict()
            if urls['html'] == self.current_filters_url:
                # The new URL matches our current filter
                # url, so don't add this facet option.
                continue

            option = LastUpdatedOrderedDict()
            option['id'] = urls['html']
            option['json'] = urls['json']
            option['count'] = count
            option['type'] = 'Feature'
            option['category'] = 'oc-api:geo-facet'

            # Add some general chronology information to the
            # geospatial tile.
            option = self._add_when_object_to_feature_option(
                tile,
                option,
            )

            gm = GlobalMercator()
            if feature_type == 'Polygon':
                # Get polygon coordinates (a list of lists)
                geo_coords = gm.quadtree_to_geojson_poly_coords(tile)
            elif feature_type == 'Point':
                # Get point coordinates (a list of lon,lat values)
                geo_coords = gm.quadtree_to_geojson_lon_lat(tile)
            else:
                # We shouldn't be here!
                continue

            # Add the geometry object to the facet option.
            geometry = LastUpdatedOrderedDict()
            geometry['id'] = '#geo-disc-tile-geom-{}'.format(tile)
            geometry['type'] = feature_type
            geometry['coordinates'] = geo_coords
            option['geometry'] = geometry

            properties = LastUpdatedOrderedDict()
            properties['id'] = '#geo-disc-tile-{}'.format(tile)
            properties['href'] = option['id']
            properties['label'] = 'Discovery region ({})'.format(
                (len(options) + 1))
            properties['feature-type'] = 'discovery region (facet)'
            properties['count'] = count
            properties['early bce/ce'] = self.min_date
            properties['late bce/ce'] = self.max_date
            option['properties'] = properties

            options.append(option)

        return options
Пример #14
0
 def process_solr_tiles(self, solr_tiles):
     """ processes the solr_json 
         discovery geo tiles,
         aggregating to a certain
         depth
     """
     # first aggregate counts for tile that belong togther
     aggregate_tiles = LastUpdatedOrderedDict()
     i = -1
     t = 0
     if len(solr_tiles) <= self.min_tile_count:
         # it's half of the solr_tile list, because the first item is the key
         # 2nd item is the count.
         # don't aggregate if there's not much to aggregate
         self.aggregation_depth = self.max_depth
     else:
         # suggest tile-depth
         self.aggregation_depth = self.get_suggested_tile_depth(solr_tiles)
     for tile_key in solr_tiles[::2]:
         t += 1
         i += 2
         solr_facet_count = solr_tiles[i]
         if tile_key != 'false':
             if self.limiting_tile is False:
                 ok_to_add = True
             else:
                 # constrain to show facets ONLY within
                 # the current queried tile
                 if self.limiting_tile in tile_key:
                     ok_to_add = True
                 else:
                     ok_to_add = False
             if ok_to_add:
                 # first get full date range for
                 # facets that are OK to add
                 chrono_t = ChronoTile()
                 dates = chrono_t.decode_path_dates(tile_key)
                 if isinstance(dates, dict):
                     # chronotupe is valid, now check to make sure we
                     # actually want it in the results
                     if self.exclude_before is not False:
                         if dates['earliest_bce'] < self.exclude_before:
                             # too early before the exclude before date
                             ok_to_add = False
                     if self.exclude_after is not False:
                         if dates['latest_bce'] > self.exclude_after:
                             # to late, after the exclude after date
                             ok_to_add = False
                 else:
                     # not valid tile, so don't add
                     ok_to_add = False
             if ok_to_add:
                 if isinstance(dates, dict):
                     if self.min_date is False:
                         self.min_date = dates['earliest_bce']
                         self.max_date = dates['latest_bce']
                     else:
                         if self.min_date > dates['earliest_bce']:
                             self.min_date = dates['earliest_bce']
                         if self.max_date < dates['latest_bce']:
                             self.max_date = dates['latest_bce']
                 # now aggregrate the OK to use facets
                 trim_tile_key = tile_key[:self.aggregation_depth]
                 if trim_tile_key not in aggregate_tiles:
                     aggregate_tiles[trim_tile_key] = 0
                 aggregate_tiles[trim_tile_key] += solr_facet_count
     # now generate GeoJSON for each tile region
     # print('Chronology tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles)))
     # --------------------------------------------
     # code to sort the list of tiles by start date and time span
     # --------------------------------------------
     sorting_ranges = []
     for tile_key, aggregate_count in aggregate_tiles.items():
         chrono_t = ChronoTile()
         dates = chrono_t.decode_path_dates(tile_key)
         dates['tile_key'] = tile_key
         sorting_ranges.append(dates)
     # now sort by earliest bce, then reversed latest bce
     # this makes puts early dates with longest timespans first
     sorted_ranges = sorted(sorting_ranges,
                            key=lambda k:
                            (k['earliest_bce'], -k['latest_bce']))
     sorted_tiles = LastUpdatedOrderedDict()
     for sort_range in sorted_ranges:
         tile_key = sort_range['tile_key']
         sorted_tiles[tile_key] = aggregate_tiles[tile_key]
     i = 0
     for tile_key, aggregate_count in sorted_tiles.items():
         i += 1
         fl = FilterLinks()
         fl.base_request_json = self.filter_request_dict_json
         fl.spatial_context = self.spatial_context
         new_rparams = fl.add_to_request('form-chronotile', tile_key)
         record = LastUpdatedOrderedDict()
         record['id'] = fl.make_request_url(new_rparams)
         record['json'] = fl.make_request_url(new_rparams, '.json')
         record['count'] = aggregate_count
         record['category'] = 'oc-api:chrono-facet'
         chrono_t = ChronoTile()
         dates = chrono_t.decode_path_dates(tile_key)
         if self.exclude_before is not False:
             if dates['earliest_bce'] < self.exclude_before:
                 dates['earliest_bce'] = self.exclude_before
         if self.exclude_after is not False:
             if dates['latest_bce'] > self.exclude_after:
                 dates['latest_bce'] = self.exclude_after
         # convert numeric to GeoJSON-LD ISO 8601
         record['start'] = ISOyears().make_iso_from_float(
             dates['earliest_bce'])
         record['stop'] = ISOyears().make_iso_from_float(
             dates['latest_bce'])
         properties = LastUpdatedOrderedDict()
         properties['early bce/ce'] = dates['earliest_bce']
         properties['late bce/ce'] = dates['latest_bce']
         record['properties'] = properties
         self.chrono_tiles.append(record)
Пример #15
0
 def get_field_groups_and_fields(self):
     """ gets fields used in this import profile,
         it's not super efficient but it doesn't have to be
         because it is querying very small data
     """
     mandatory_predicates = []
     if self.inp_prof.item_type in InputField.PREDICATES_OC:
         # exclude the mandatory fields for this type of item
         mandatory_predicates = InputField.PREDICATES_OC[
             self.inp_prof.item_type]
     bad_field_uuids = []
     groups = []
     if self.inp_prof is not False:
         inp_groups = InputFieldGroup.objects\
                                     .filter(profile_uuid=self.uuid)
         index = 0
         for inp_group in inp_groups:
             index += 1
             group = LastUpdatedOrderedDict()
             group['id'] = inp_group.uuid
             group['label'] = inp_group.label
             group['visibility'] = inp_group.visibility
             group['vis_note'] = InputFieldGroup.GROUP_VIS[
                 inp_group.visibility]
             if len(group['label']) < 1:
                 group['label'] = 'Field group: ' + str(index)
             group['note'] = inp_group.note
             group['obs_num'] = inp_group.obs_num
             group['fields'] = []
             inp_group_fields = InputField.objects\
                                          .filter(profile_uuid=self.uuid,
                                                  fgroup_uuid=inp_group.uuid)
             for inp_field in inp_group_fields:
                 add_ok = False
                 field = LastUpdatedOrderedDict()
                 field['id'] = inp_field.uuid
                 field['sort'] = inp_field.sort
                 field['predicate_uuid'] = inp_field.predicate_uuid
                 if inp_field.predicate_uuid not in InputField.PREDICATE_ITEMS:
                     ent = Entity()
                     found = ent.dereference(inp_field.predicate_uuid)
                     if found:
                         add_ok = True
                         if len(inp_field.label) < 1:
                             inp_field.label = ent.label
                             inp_field.save()
                         field['label'] = inp_field.label
                         field['data_type'] = ent.data_type
                         field['oc_required'] = False
                     else:
                         # we've got data entry fields that don't exist, so delete them
                         add_ok = False
                         bad_field_uuids.append(inp_field.uuid)
                 else:
                     add_ok = True
                     preset = InputField.PREDICATE_ITEMS[
                         inp_field.predicate_uuid]
                     field['label'] = preset['label']
                     field['data_type'] = preset['data_type']
                     field['oc_required'] = True
                 field['note'] = inp_field.note
                 try:
                     val_obj = json.loads(inp_field.validation)
                 except:
                     val_obj = LastUpdatedOrderedDict()
                 field['validation'] = val_obj
                 if add_ok:
                     # ok to add this to the list
                     group['fields'].append(field)
             groups.append(group)
     if len(bad_field_uuids) > 0:
         # delete the bad fields
         InputField.objects\
                   .filter(uuid__in=bad_field_uuids)\
                   .delete()
     self.field_groups = groups
     return self.field_groups
Пример #16
0
 def get_item_list(self, uuid, start=0, rows=10, sort=False, last=False):
     """ returns a list of items
         made with the current profile
     """
     ok_sorts = ['label', '-label', 'revised', '-revised']
     output = False
     if start != 0:
         try:
             start = int(float(start))
         except:
             start = 0
     if rows != 10:
         try:
             rows = int(float(rows))
         except:
             rows = 10
     if sort is False:
         sort = '-label,-revised'
     if ',' in sort:
         sort_ex = sort.split(',')
     else:
         sort_ex = [sort, '']
     sort_param_ok = True
     if sort_ex[0] not in ok_sorts:
         sort_ex[0] = '-label'
         sort_param_ok = False
     if sort_ex[1] not in ok_sorts:
         sort_ex[1] = '-revised'
     if self.rev_sort(sort_ex[1]) == sort_ex[0]:
         sort_ex[1] = sort_ex[0]
     if sort_param_ok:
         sort_param = '?sort=' + ','.join(sort_ex)
     else:
         sort_param = ''
     ok = self.check_exists(uuid)
     if ok:
         # the profile exists
         url = self.base_url + '/edit/inputs/profile-item-list/' + uuid + sort_param
         if '?' in url:
             param_r = '&rows=' + str(rows)
         else:
             param_r = '?rows=' + str(rows)
         output = LastUpdatedOrderedDict()
         # the profile exists
         source_id = 'profile:' + uuid
         output['uuid'] = uuid
         output['source_id'] = source_id
         output['label'] = self.inp_prof.label
         man_count = Manifest.objects\
                             .filter(source_id=source_id)\
                             .values('source_id')\
                             .annotate(total=Count('uuid'))
         total = man_count[0]['total']
         end = start + rows
         output['count'] = total
         num_pages = round(total / rows, 0)
         if num_pages * rows >= total:
             num_pages -= 1
         last_start = int(num_pages * rows)
         if start == 0:
             output['href'] = url
         else:
             output['href'] = url + param_r + '&start=' + str(start)
         if total <= rows:
             output['first'] = False
             output['previous'] = False
             output['next'] = False
             output['last'] = False
         else:
             if start > 0:
                 output['first'] = url + param_r
             else:
                 output['first'] = False
             prev_start = start - rows
             if start > 0 and prev_start < 0:
                 prev_start = 0
             if prev_start >= 0:
                 output['previous'] = url + param_r + '&start=' + str(
                     prev_start)
             else:
                 output['previous'] = False
             if end < total:
                 output['next'] = url + param_r + '&start=' + str(end)
             else:
                 output['next'] = False
             if end < total:
                 if last_start > 0 and last_start < total:
                     output['last'] = url + param_r + '&start=' + str(
                         last_start)
                 else:
                     output['last'] = False
             else:
                 output['last'] = False
         if last:
             man_list = Manifest.objects\
                                .filter(source_id=source_id)\
                                .order_by(sort_ex[0], sort_ex[1])[last_start:total]
         else:
             man_list = Manifest.objects\
                                .filter(source_id=source_id)\
                                .order_by(sort_ex[0], sort_ex[1])[start:end]
         output['items'] = []
         if last:
             index = last_start
         else:
             index = start
         for man in man_list:
             index += 1
             item = LastUpdatedOrderedDict()
             item['index'] = index
             item['uuid'] = man.uuid
             item['label'] = man.label
             item['revised'] = man.revised.date().isoformat()
             output['items'].append(item)
     return output
Пример #17
0
 def generate_table_metadata(self, table_id, overwrite=False):
     """ makes metadata for a specific table """
     ex_id = ExpTableIdentifiers()
     ex_id.make_all_identifiers(table_id)
     table_ids = [ex_id.table_id, ex_id.public_table_id]
     try:
         ex_tab = ExpTable.objects.get(table_id=table_id)
     except ExpTable.DoesNotExist:
         print('No ExpTable object for: ' + ex_id.public_table_id)
         ex_tab = None
     try:
         man_obj = Manifest.objects.get(uuid=ex_id.public_table_id)
     except Manifest.DoesNotExist:
         print('No manifest object for: ' + ex_id.public_table_id)
         man_obj = None
     if ex_tab is not None and man_obj is not None:
         proj_uuid_counts = None
         for meta_pred in self.metadata_predicates:
             if overwrite:
                 num_old_delete = LinkAnnotation.objects\
                                                .filter(subject__in=table_ids,
                                                        predicate_uri=meta_pred)\
                                                .delete()
                 print('Deleted annoations ' + str(num_old_delete) +
                       ' for ' + meta_pred)
                 add_meta_for_pred = True
             else:
                 num_exists = LinkAnnotation.objects\
                                            .filter(subject__in=table_ids,
                                                    predicate_uri=meta_pred)[:1]
                 if len(num_exists) < 1:
                     add_meta_for_pred = True
                 else:
                     add_meta_for_pred = False
             if add_meta_for_pred:
                 if meta_pred == 'dc-terms:contributor':
                     print('Getting contributors for ' + table_id)
                     sorted_author_list = self.get_table_author_counts(
                         table_id)
                     contrib_sort = 0
                     for s_author in sorted_author_list:
                         contrib_sort += 1
                         obj_extra = LastUpdatedOrderedDict()
                         obj_extra['count'] = s_author['count']
                         la = LinkAnnotation()
                         la.subject = man_obj.uuid
                         la.subject_type = man_obj.item_type
                         la.project_uuid = man_obj.project_uuid
                         la.source_id = 'exp-table-manage'
                         la.predicate_uri = meta_pred
                         la.object_uri = URImanagement.make_oc_uri(
                             s_author['uuid'], 'persons')
                         la.creator_uuid = '0'
                         la.sort = contrib_sort
                         la.obj_extra = obj_extra
                         la.save()
                 if meta_pred in ['dc-terms:creator', 'dc-terms:source']:
                     # need to get projects for this
                     if proj_uuid_counts is None:
                         # only get this if not gotten yet
                         print('Getting projects for ' + table_id)
                         proj_uuid_counts = self.get_table_project_uuid_counts(
                             table_id)
                     if meta_pred == 'dc-terms:creator':
                         print('Getting creators for ' + table_id)
                         dc_creator_list = self.make_table_dc_creator_list(
                             proj_uuid_counts)
                         create_sort = 0
                         for dc_creator in dc_creator_list:
                             create_sort += 1
                             obj_extra = LastUpdatedOrderedDict()
                             obj_extra['count'] = dc_creator['count']
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = 'exp-table-manage'
                             la.predicate_uri = meta_pred
                             la.object_uri = dc_creator['id']
                             la.creator_uuid = '0'
                             la.sort = create_sort
                             la.obj_extra = obj_extra
                             la.save()
                     if meta_pred == 'dc-terms:source':
                         print('Getting sources for ' + table_id)
                         proj_sort = 0
                         for proj_uuid_count in proj_uuid_counts:
                             proj_sort += 1
                             obj_extra = LastUpdatedOrderedDict()
                             obj_extra['count'] = proj_uuid_count[
                                 'num_uuids']
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = 'exp-table-manage'
                             la.predicate_uri = meta_pred
                             la.object_uri = URImanagement.make_oc_uri(
                                 proj_uuid_count['project_uuid'],
                                 'projects')
                             la.creator_uuid = '0'
                             la.sort = proj_sort
                             la.obj_extra = obj_extra
                             la.save()
                 if meta_pred == 'dc-terms:subject':
                     print('Getting subjects for ' + table_id)
                     dc_subject_list = self.make_table_dc_subject_category_list(
                         table_id)
                     subj_sort = 0
                     for dc_subject in dc_subject_list:
                         subj_sort += 1
                         obj_extra = LastUpdatedOrderedDict()
                         obj_extra['count'] = dc_subject['count']
                         la = LinkAnnotation()
                         la.subject = man_obj.uuid
                         la.subject_type = man_obj.item_type
                         la.project_uuid = man_obj.project_uuid
                         la.source_id = 'exp-table-manage'
                         la.predicate_uri = meta_pred
                         la.object_uri = dc_subject['id']
                         la.creator_uuid = '0'
                         la.sort = subj_sort
                         la.obj_extra = obj_extra
                         la.save()
Пример #18
0
class Create():

    EQUIV_PREDICATES = [
        'skos:closeMatch', 'http://www.w3.org/2004/02/skos/core#closeMatch'
    ]

    def __init__(self):
        self.table_id = False
        self.label = False
        self.dates_bce_ce = True  # calendar dates in BCE/CE, if false BP
        self.include_equiv_ld = True  # include linked data related by EQUIV_PREDICATES
        self.include_ld_obj_uris = True  # include URIs to linked data objects
        self.include_ld_source_values = True  # include original values annoted as
        # equivalent to linked data
        self.boolean_multiple_ld_fields = 'yes'  # for multiple values of linked data
        # (same predicate, multiple objects)
        # make multiple fields if NOT False.
        # When this value is NOT False, its
        # string value indicates presence of
        # a linked data object uri.
        self.include_original_fields = False  # include original field data
        self.fields = []
        self.context_fields = LastUpdatedOrderedDict()
        self.ld_fields = LastUpdatedOrderedDict()
        self.predicate_fields = LastUpdatedOrderedDict()
        self.multi_source_value_delim = '; '  # delimiter for multiple values in source data field
        self.obs_limits = [
        ]  # limits predicate exports to listed observation numbers, no limit if empty
        self.entities = {}
        self.predicate_uris_boolean_types = False  # predicate_uris expressed as boolean types
        self.predicate_uuids = LastUpdatedOrderedDict(
        )  # predicate uuids used with a table
        self.ld_predicates = LastUpdatedOrderedDict(
        )  # unique linked_data predicates
        self.ld_object_equivs = LastUpdatedOrderedDict(
        )  # unique linked_data predicates
        self.dc_contributor_ids = {
        }  # dict with ID keys and counts of dc-terms:contributor
        self.dc_creator_ids = {
        }  # dict with ID keys and counts of dc-terms:creator
        self.uuidlist = []
        self.parents = {
        }  # dict of uuids for parent entities to keep them in memory

    def prep_default_fields(self):
        """ Prepares initial set of default fields for export tables """
        self.fields.append({
            'label': 'URI',
            'rel_ids': ['@id'],
            'field_num': 1
        })
        self.fields.append({
            'label': 'Label',
            'rel_ids': ['label'],
            'field_num': 2
        })
        self.fields.append({
            'label': 'Project',
            'rel_ids': ['proj-label'],
            'field_num': 3
        })
        self.fields.append({
            'label': 'Project URI',
            'rel_ids': ['proj-uri'],
            'field_num': 4
        })
        self.fields.append({
            'label': 'Item Category',
            'rel_ids': ['item-category'],
            'field_num': 5
        })
        self.fields.append({
            'label': 'Last Updated',
            'rel_ids': ['last-updated'],
            'field_num': 6
        })
        self.fields.append({
            'label': 'Authorship',
            'rel_ids': ['authorship'],
            'field_num': 7
        })
        self.fields.append({
            'label': 'Latitude (WGS-84)',
            'rel_ids': ['latitude'],
            'field_num': 8
        })
        self.fields.append({
            'label': 'Longitude (WGS-84)',
            'rel_ids': ['longitude'],
            'field_num': 9
        })
        self.fields.append({
            'label': 'Geospatial note',
            'rel_ids': ['geospatial-note'],
            'field_num': 10
        })
        if self.dates_bce_ce:
            self.fields.append({
                'label': 'Early Date (BCE/CE)',
                'rel_ids': ['early-bce-ce'],
                'field_num': 11
            })
            self.fields.append({
                'label': 'Late Date (BCE/CE)',
                'rel_ids': ['late-bce-ce'],
                'field_num': 12
            })
        else:
            self.fields.append({
                'label': 'Early Date (BP)',
                'rel_ids': ['early-bp'],
                'field_num': 11
            })
            self.fields.append({
                'label': 'Late Date (BP)',
                'rel_ids': ['late-bp'],
                'field_num': 12
            })
        self.fields.append({
            'label': 'Context URI',
            'rel_ids': ['context-uri'],
            'field_num': 13
        })
        for field in self.fields:
            self.save_field(field)

    def save_field(self, field):
        """ Saves a record of a field """
        exfield = ExpField()
        exfield.table_id = self.table_id
        exfield.field_num = field['field_num']
        exfield.label = field['label']
        exfield.rel_ids = json.dumps(field['rel_ids'], ensure_ascii=False)
        exfield.save()

    def check_reload_fields_from_db(self):
        """ Reloads fields, incase a process was interrupted """
        if len(self.fields) < 1:
            exfields = ExpField.objects\
                               .filter(table_id=self.table_id)\
                               .order_by('field_num')
            for exfield in exfields:
                field = {}
                field['field_num'] = exfield.field_num
                field['label'] = exfield.label
                field['rel_ids'] = json.loads(exfield.rel_ids)
                self.fields.append(field)

    def prep_process_uuids_by_projects_class(self, project_uuids, class_uri):
        """ Gets a list of uuids and basic metadata about items for the
            export table. Does so in the simpliest way, filtering only
            by a list of project_uuids and class_uri """
        self.prep_default_fields()
        self.uuidlist = UUIDListSimple(project_uuids, class_uri).uuids
        self.process_uuid_list(self.uuidlist)
        self.get_predicate_uuids()  # now prepare to do item descriptions
        self.get_predicate_link_annotations(
        )  # even if not showing linked data
        self.process_ld_predicates_values()  # only if exporting linked data
        self.save_ld_fields()  # only if exporting linked data
        self.update_table_metadata()  # save a record of the table metadata

    def prep_process_uuid_list(self, uuids, do_linked_data=False):
        """ prepares default fields and exports a list of items """
        self.uuidlist = uuids
        self.prep_default_fields()
        self.process_uuid_list(self.uuidlist)
        self.get_predicate_uuids()  # now prepare to do item descriptions
        self.get_predicate_link_annotations(
        )  # even if not showing linked data
        if do_linked_data:
            self.process_ld_predicates_values(
            )  # only if exporting linked data
            self.save_ld_fields()  # only if exporting linked data
        self.save_source_fields(
        )  # save source data, possibly limited by observations
        self.update_table_metadata()  # save a record of the table metadata

    def process_uuid_list(self, uuids, starting_row=1):
        row_num = starting_row
        for uuid in uuids:
            try:
                man = Manifest.objects.get(uuid=uuid)
            except Manifest.DoesNotExist:
                man = False
            if man is not False:
                print(str(row_num) + ': ' + str(uuid))
                self.save_basic_default_field_cells(row_num, man)
                self.save_authorship(row_num, man)
                context_metadata = self.get_parents_context_metadata(man.uuid)
                self.save_default_geo(row_num, man, context_metadata['geo'])
                self.save_default_chrono(row_num, man,
                                         context_metadata['event'])
                self.save_context(row_num, man, context_metadata['p_list'])
                row_num += 1
            else:
                print(uuid + ' Failed!')

    def get_parents_context_metadata(self, uuid):
        """ get all parents from memory or by DB lookups """
        if len(self.parents) >= 5000:
            self.parents = {}
        par_res = Assertion.objects\
                           .filter(object_uuid=uuid,
                                   predicate_uuid=Assertion.PREDICATES_CONTAINS)[:1]
        if len(par_res) > 0:
            # item has a parent
            parent_uuid = par_res[0].uuid
            if parent_uuid not in self.parents:
                # we don't have a context path parent list for this parent in memory yet
                # so let's go and make it
                p_list = []
                act_contain = Containment()
                raw_parents = act_contain.get_parents_by_child_uuid(
                    parent_uuid)
                if raw_parents is not False:
                    if len(raw_parents) > 0:
                        for tree_node, r_parents in raw_parents.items():
                            p_list = r_parents
                            break
                p_list.insert(
                    0,
                    parent_uuid)  # add the 1st parent to the start of the list
                context_metadata = {'p_list': p_list}
                self.parents[parent_uuid] = context_metadata
            else:
                context_metadata = self.parents[parent_uuid]
        else:
            parent_uuid = False
        # now get geo and chrono metadata
        context_metadata = self.get_geo_chrono_metadata(
            uuid, parent_uuid, context_metadata)
        return context_metadata

    def get_geo_chrono_metadata(self, uuid, parent_uuid, context_metadata):
        """ gets and saves geo and chrono metadata """
        act_contain = Containment()
        geo_meta = False
        event_meta = False
        uuid_geo = Geospace.objects.filter(uuid=uuid)[:1]
        if len(uuid_geo) > 0:
            geo_meta = uuid_geo[0]
        else:
            # geo information for this item not found, look to parents
            if parent_uuid is not False \
               and 'p_list' in context_metadata:
                # we have at least 1 parent
                if 'p_geo' not in context_metadata:
                    # no saved geo information in this context path, so look it up
                    p_list = context_metadata['p_list']
                    geo_meta = act_contain.get_geochron_from_subject_list(
                        p_list, 'geo')
                    context_metadata['p_geo'] = geo_meta
                    self.parents[parent_uuid] = context_metadata
                else:
                    # we have saved geo information for this context path so use it
                    geo_meta = context_metadata['p_geo']
        uuid_event = Event.objects.filter(uuid=uuid)[:1]
        if len(uuid_event) > 0:
            event_meta = uuid_event
        else:
            # chrono information for this item not found, look to parents
            if parent_uuid is not False \
               and 'p_list' in context_metadata:
                # we have at least 1 parent
                if 'p_event' not in context_metadata:
                    # no saved chrono information in this context path, so look it up
                    p_list = context_metadata['p_list']
                    event_meta = act_contain.get_geochron_from_subject_list(
                        p_list, 'event')
                    context_metadata['p_event'] = event_meta
                    self.parents[parent_uuid] = context_metadata
                else:
                    # we have saved chrono information for this context path so use it
                    event_meta = context_metadata['p_event']
        context_metadata['geo'] = geo_meta
        context_metadata['event'] = event_meta
        return context_metadata

    def get_predicate_uuids(self):
        """ Gets predicate uuids for a table """
        self.entities = {
        }  # resets the entites, no need to keep context entitites in memory
        self.check_reload_fields_from_db(
        )  # gets fields from DB, if process was interrupted
        limit_obs = False
        if isinstance(self.obs_limits, list):
            if len(self.obs_limits) > 0:
                limit_obs = True
        uuids = UUIDListExportTable(self.table_id).uuids
        # seems faster than a select distinct with a join.
        for uuid in uuids:
            if limit_obs:
                pred_uuids = Assertion.objects\
                                      .values_list('predicate_uuid', flat=True)\
                                      .filter(uuid=uuid,
                                              obs_num__in=self.obs_limits)
            else:
                pred_uuids = Assertion.objects\
                                      .values_list('predicate_uuid', flat=True)\
                                      .filter(uuid=uuid)
            item_preds = LastUpdatedOrderedDict()
            for pred_uuid in pred_uuids:
                if pred_uuid not in item_preds:
                    item_preds[pred_uuid] = 1
                else:
                    item_preds[pred_uuid] += 1
            for pred_uuid, count in item_preds.items():
                if pred_uuid not in self.predicate_uuids:
                    pred_label = self.deref_entity_label(pred_uuid)
                    pred_type = self.entities[pred_uuid].data_type
                    self.predicate_uuids[pred_uuid] = {
                        'count': count,
                        'label': pred_label,
                        'type': pred_type
                    }
                else:
                    if self.predicate_uuids[pred_uuid]['count'] < count:
                        self.predicate_uuids[pred_uuid]['count'] = count
        return self.predicate_uuids

    def get_predicate_link_annotations(self):
        """ Gets the link data annotations for predicates used on a table """
        auth = Authorship()
        for pred_uuid, pred in self.predicate_uuids.items():
            la_s = LinkAnnotation.objects\
                                 .filter(subject=pred_uuid)
            if len(la_s) > 0:
                self.predicate_uuids[pred_uuid]['annotations'] = []
                self.predicate_uuids[pred_uuid]['ld-equiv'] = []
            for la in la_s:
                link_anno = {'pred': la.predicate_uri, 'obj': la.object_uri}
                self.predicate_uuids[pred_uuid]['annotations'].append(
                    link_anno)
                if la.predicate_uri in self.EQUIV_PREDICATES:
                    authorship = auth.check_authorship_object(la.object_uri)
                    if authorship is False:  # only keep predicates not related to authorship
                        pred_ld_equiv_uri = la.object_uri  # the object_uri is equivalent to
                        # the predicate_uuid
                        self.predicate_uuids[pred_uuid]['ld-equiv'].append(
                            pred_ld_equiv_uri)
                        if la.object_uri not in self.ld_predicates:
                            pred_equiv_label = self.deref_entity_label(
                                pred_ld_equiv_uri)
                            self.ld_predicates[pred_ld_equiv_uri] = {
                                'uuids': [pred_uuid],
                                'obj_uuids': {},
                                'obj_uris': [],
                                'label': pred_equiv_label
                            }
                        else:
                            self.ld_predicates[pred_ld_equiv_uri][
                                'uuids'].append(pred_uuid)
        return self.ld_predicates

    def process_ld_predicates_values(self):
        """ Processes linked uri equivalents for predicates to
            get linked data for objects assocated with these predicates
        """
        if self.include_equiv_ld and len(self.ld_predicates) > 0:
            for pred_ld_equiv_uri, ld_pred in self.ld_predicates.items():
                self.get_ld_predicate_values(pred_ld_equiv_uri)

    def get_ld_predicate_values(self, pred_ld_equiv_uri):
        """ gets a list of object_uuids used with predicates related to a
            ld_field_uri
        """
        object_uuids = Assertion.objects\
                                .values_list('object_uuid', flat=True)\
                                .filter(predicate_uuid__in=self.ld_predicates[pred_ld_equiv_uri]['uuids'])\
                                .distinct()
        for obj_uuid in object_uuids:
            if obj_uuid not in self.ld_object_equivs:
                self.ld_object_equivs[obj_uuid] = []
            if obj_uuid not in self.ld_predicates[pred_ld_equiv_uri][
                    'obj_uuids']:
                obj_equiv_uris = []
                # get link data annotations for the object_uuid
                la_s = LinkAnnotation.objects\
                                     .filter(subject=obj_uuid)
                for la in la_s:
                    if la.predicate_uri in self.EQUIV_PREDICATES:
                        obj_equiv_uri = la.object_uri
                        if obj_equiv_uri not in self.ld_predicates[
                                pred_ld_equiv_uri]['obj_uris']:
                            self.ld_predicates[pred_ld_equiv_uri][
                                'obj_uris'].append(obj_equiv_uri)
                        if obj_equiv_uri not in self.ld_object_equivs[
                                obj_uuid]:
                            self.ld_object_equivs[obj_uuid].append(
                                obj_equiv_uri)
        return self.ld_predicates[pred_ld_equiv_uri]

    def do_boolean_multiple_ld_fields(self, pred_ld_equiv_uri):
        """ Checks to see if a ld_field_uri (equivalent to a predicate_uuid in assertions)
            has multiple values in a given item. If so, then returns true.
            Otherwise, this returns false.
        """
        output = False
        if self.boolean_multiple_ld_fields is not False:
            if pred_ld_equiv_uri in self.ld_predicates:
                for predicate_uuid in self.ld_predicates[pred_ld_equiv_uri][
                        'uuids']:
                    if predicate_uuid in self.predicate_uuids:
                        if self.predicate_uuids[predicate_uuid]['count'] > 1:
                            output = True
        return output

    def save_source_fields(self):
        """ Creates fields for source data, then saves
            records of source data for each item in the export
            table
        """
        if self.include_original_fields and len(self.predicate_uuids) > 0:
            limit_obs = False
            if isinstance(self.obs_limits, list):
                if len(self.obs_limits) > 0:
                    limit_obs = True
            pred_uuid_list = []
            for predicate_uuid, pred_dict in self.predicate_uuids.items():
                field_num = self.get_add_predicate_field_number(predicate_uuid)
                pred_uuid_list.append(predicate_uuid)
            # get the rows for the export table
            rows = UUIDsRowsExportTable(self.table_id).rows
            for row in rows:
                if limit_obs:
                    item_data = Assertion.objects.filter(
                        uuid=row['uuid'],
                        predicate_uuid__in=pred_uuid_list,
                        obs_num__in=self.obs_limits)
                else:
                    item_data = Assertion.objects.filter(
                        uuid=row['uuid'], predicate_uuid__in=pred_uuid_list)
                if len(item_data) > 0:
                    self.add_source_cells(row['uuid'], row['row_num'],
                                          item_data)

    def add_source_cells(self, uuid, row_num, item_data):
        """ Adds source data records for an assertion """
        predicate_values = LastUpdatedOrderedDict()
        project_uuid = item_data[0].project_uuid
        for assertion in item_data:
            predicate_uuid = assertion.predicate_uuid
            object_uuid = assertion.object_uuid
            if assertion.object_type == 'xsd:string':
                try:
                    oc_str = OCstring.objects.get(uuid=object_uuid)
                    obj_val = oc_str.content
                except OCstring.DoesNotExist:
                    obj_val = ''
            elif assertion.object_type in ['xsd:integer', 'xsd:double']:
                # numeric value
                obj_val = str(assertion.data_num)
            elif assertion.object_type == 'xsd:date':
                obj_val = str(assertion.data_date)
            else:
                obj_val = str(self.deref_entity_label(object_uuid))
            if predicate_uuid not in predicate_values:
                # make a list, since some predicates are multi-valued
                predicate_values[predicate_uuid] = []
            predicate_values[predicate_uuid].append(obj_val)
        for predicate_uuid, val_list in predicate_values.items():
            field_num = self.get_add_predicate_field_number(predicate_uuid)
            cell = ExpCell()
            cell.table_id = self.table_id
            cell.uuid = uuid
            cell.project_uuid = project_uuid
            cell.row_num = row_num
            cell.field_num = field_num
            cell.record = self.multi_source_value_delim.join(
                val_list)  # semi-colon delim for multivalued predicates
            cell.save()
            cell = None

    def get_add_predicate_field_number(self, predicate_uuid):
        """ Gets the field_num for a source predicate_uuid field,
            givem the predicate_uuid
            Creates a new field for the predicate as needed
        """
        if predicate_uuid in self.predicate_fields:
            field_num = self.predicate_fields[predicate_uuid]
        else:
            field_num = len(self.fields) + 1
            label = self.deref_entity_label(predicate_uuid) + ' [Source]'
            rel_ids = [predicate_uuid]
            field = {
                'label': label,
                'rel_ids': rel_ids,
                'field_num': field_num
            }
            self.fields.append(field)
            self.save_field(field)
            self.predicate_fields[predicate_uuid] = field_num
        return field_num

    def save_ld_fields(self):
        """ Creates fields for linked data, then saves
            records of linked data for each item in the export
            table
        """
        if self.include_equiv_ld and len(self.ld_predicates) > 0:
            for pred_ld_equiv_uri, ld_pred in self.ld_predicates.items():
                if self.do_boolean_multiple_ld_fields(pred_ld_equiv_uri):
                    le_sort = LinkEntitySorter()
                    #  sort the URIs for the objects, so the fields come in a
                    #  nice, reasonable order.
                    sort_obj_uris = le_sort.sort_ld_entity_list(
                        ld_pred['obj_uris'])
                    for ld_obj_uri in sort_obj_uris:
                        # make a field for each linked data pred and object
                        field_num = self.get_add_ld_field_number(
                            '[Has]', pred_ld_equiv_uri, ld_obj_uri)
                else:
                    if self.include_ld_obj_uris:
                        field_num = self.get_add_ld_field_number(
                            '[URI]', pred_ld_equiv_uri)
                    field_num = self.get_add_ld_field_number(
                        '[Label]', pred_ld_equiv_uri)
                    if self.include_ld_source_values:
                        field_num = self.get_add_ld_field_number(
                            '[Source]', pred_ld_equiv_uri)
            # get the rows for the export table
            rows = UUIDsRowsExportTable(self.table_id).rows
            for row in rows:
                for pred_ld_equiv_uri, ld_pred in self.ld_predicates.items():
                    item_data = Assertion.objects.filter(
                        uuid=row['uuid'], predicate_uuid__in=ld_pred['uuids'])
                    if len(item_data) > 0:
                        self.add_ld_cells(row['uuid'], row['row_num'],
                                          item_data, pred_ld_equiv_uri)

    def add_ld_cells(self, uuid, row_num, item_data, pred_ld_equiv_uri):
        """ Adds linked data records for an assertion """
        if self.do_boolean_multiple_ld_fields(pred_ld_equiv_uri):
            multi_ld_fields = True
        else:
            multi_ld_fields = False
        obj_values = LastUpdatedOrderedDict()
        obj_values['[URI]'] = []
        obj_values['[Label]'] = []
        obj_values['[Source]'] = []
        project_uuid = item_data[0].project_uuid
        for assertion in item_data:
            object_uuid = assertion.object_uuid
            if assertion.object_type == 'xsd:string':
                try:
                    oc_str = OCstring.objects.get(uuid=object_uuid)
                    obj_label = oc_str.content
                except OCstring.DoesNotExist:
                    obj_label = ''
            else:
                obj_label = self.deref_entity_label(object_uuid)
                obj_label = str(obj_label)
            if obj_label not in obj_values['[Source]']:
                obj_values['[Source]'].append(obj_label)
            obj_ld_found = False
            if object_uuid in self.ld_object_equivs:
                for obj_ld_equiv_uri in self.ld_object_equivs[object_uuid]:
                    obj_ld_found = True
                    if multi_ld_fields:
                        cell_value = self.boolean_multiple_ld_fields
                        field_num = self.get_add_ld_field_number(
                            '[Has]', pred_ld_equiv_uri, obj_ld_equiv_uri)
                        cell = ExpCell()
                        cell.table_id = self.table_id
                        cell.uuid = uuid
                        cell.project_uuid = project_uuid
                        cell.row_num = row_num
                        cell.field_num = field_num
                        cell.record = cell_value
                        cell.save()
                        cell = None
                    else:
                        # predicate not broken into seperate fields for different values
                        obj_equiv_label = self.deref_entity_label(
                            obj_ld_equiv_uri)
                        if obj_equiv_label is False:
                            obj_equiv_label = obj_ld_equiv_uri
                        if obj_equiv_label not in obj_values['[Label]']:
                            obj_values['[Label]'].append(obj_equiv_label)
                        if obj_ld_equiv_uri not in obj_values['[URI]']:
                            obj_values['[URI]'].append(obj_ld_equiv_uri)
            if obj_ld_found is False:
                print('No linked data for object:' + object_uuid)
        if multi_ld_fields is False:
            # predicate not broken into seperate fields for different values
            for field_type, value_list in obj_values.items():
                if len(value_list) > 0:
                    try:
                        cell_value = '; '.join(value_list)
                    except:
                        # some messiness in the data, won't join into a string
                        cell_value = False
                        for val in value_list:
                            val = str(val)
                            if cell_value is False:
                                cell_value = val
                            else:
                                cell_value += '; ' + val
                    field_num = self.get_add_ld_field_number(
                        field_type, pred_ld_equiv_uri)
                    cell = ExpCell()
                    cell.table_id = self.table_id
                    cell.uuid = uuid
                    cell.project_uuid = project_uuid
                    cell.row_num = row_num
                    cell.field_num = field_num
                    cell.record = cell_value
                    cell.save()
                    cell = None

    def get_add_ld_field_number(self,
                                field_type,
                                pred_ld_equiv_uri,
                                obj_ld_equiv_uri=False):
        """ Gets the field_num for a linked data field, given the uri
            for the linked data field, and optionally the object
            Creates a new field for the linked data as needed
        """
        if obj_ld_equiv_uri is not False:
            field_key = pred_ld_equiv_uri + '::' + obj_ld_equiv_uri
        else:
            field_key = pred_ld_equiv_uri
        if field_type is not False:
            if len(field_type) > 0:
                field_key += '::' + field_type
        else:
            field_key += '::[Type unknown]'
        if field_key in self.ld_fields:
            field_num = self.ld_fields[field_key]
        else:
            field_num = len(self.fields) + 1
            label = self.deref_entity_label(pred_ld_equiv_uri)
            if label is False:
                label = pred_ld_equiv_uri
            rel_ids = [field_type, pred_ld_equiv_uri]
            if obj_ld_equiv_uri is not False:
                rel_ids.append(obj_ld_equiv_uri)
                obj_label = self.deref_entity_label(obj_ld_equiv_uri)
                if obj_label is False:
                    obj_label = obj_ld_equiv_uri
                label = label + ' :: ' + str(obj_label)
            if field_type is not False:
                if len(field_type) > 0:
                    label += ' ' + field_type
            field = {
                'label': label,
                'rel_ids': rel_ids,
                'field_num': field_num
            }
            self.fields.append(field)
            self.save_field(field)
            self.ld_fields[field_key] = field_num
        return field_num

    def save_context(self, row_num, man, parent_list):
        """ Save context information, will also add new context fields
            as needed
        """
        use_parents = False
        context_uri = ''
        if isinstance(parent_list, list):
            if len(parent_list) > 0:
                context_uri = URImanagement.make_oc_uri(
                    parent_list[0], 'subjects')
                use_parents = parent_list[::-1]
        # save a record of the context URI
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 13
        cell.record = context_uri
        cell.save()
        cell = None
        if use_parents is not False:
            pindex = 0
            for parent_uuid in use_parents:
                pindex += 1
                context_label = self.deref_entity_label(parent_uuid)
                field_num = self.get_add_context_field_number(pindex)
                cell = ExpCell()
                cell.table_id = self.table_id
                cell.uuid = man.uuid
                cell.project_uuid = man.project_uuid
                cell.row_num = row_num
                cell.field_num = field_num
                cell.record = context_label
                cell.save()
                cell = None

    def get_add_context_field_number(self, pindex):
        """ Gets the field_num for a context field, given the pindex
            which indicates depth in the context hierarchy.
            Creates a new field for the context level as needed
        """
        if pindex in self.context_fields:
            field_num = self.context_fields[pindex]
        else:
            field_num = len(self.fields) + 1
            field = {
                'label': 'Context (' + str(pindex) + ')',
                'rel_ids': ['context', pindex],
                'field_num': field_num
            }
            self.fields.append(field)
            self.save_field(field)
            self.context_fields[pindex] = field_num
        return field_num

    def save_default_chrono(self, row_num, man, event_meta):
        """ Saves earliest / latest times for an item """
        earliest = ''
        latest = ''
        if event_meta is not False:
            times = []
            for event in event_meta:
                times.append(event.start)
                times.append(event.stop)
            earliest = min(times)
            latest = max(times)
            if self.dates_bce_ce is False:
                earliest = 1950 - earliest
                latest = 1950 - latest
            earliest = round(earliest, 0)
            latest = round(latest, 0)
        # save earliest
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 11
        cell.record = str(earliest)
        cell.save()
        cell = None
        # save latest
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 12
        cell.record = str(latest)
        cell.save()
        cell = None

    def save_default_geo(self, row_num, man, geo_meta):
        """ Saves geo lat / lon data for an item """
        latitude = ''
        longitude = ''
        note = 'Best available location data'
        if geo_meta is not False:
            for geo in geo_meta:
                if geo.meta_type == 'oc-gen:discovey-location':
                    latitude = geo.latitude
                    longitude = geo.longitude
                    if geo.specificity < 0:
                        note = 'Location approximated '
                        note += 'as a security precaution (Zoom: ' + str(
                            abs(geo.specificity)) + ')'
                    break
        # save Latitude
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 8
        cell.record = str(latitude)
        cell.save()
        cell = None
        # save Longitude
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 9
        cell.record = str(longitude)
        cell.save()
        cell = None
        # save Note
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 10
        cell.record = note
        cell.save()
        cell = None

    def save_authorship(self, row_num, man):
        """ Saves authorship information """
        authors = ''
        auth = Authorship()
        found = auth.get_authors(man.uuid, man.project_uuid)
        if found:
            # save counts of different dc-terms:creator for use as table metadata
            for auth_id in auth.creators:
                if auth_id not in self.dc_creator_ids:
                    self.dc_creator_ids[auth_id] = 0
                self.dc_creator_ids[auth_id] += 1
            # save counts of different dc-terms:contributor for use as table metadata
            for auth_id in auth.contributors:
                if auth_id not in self.dc_contributor_ids:
                    self.dc_contributor_ids[auth_id] = 0
                self.dc_contributor_ids[auth_id] += 1
            all_author_ids = auth.creators + auth.contributors
            all_authors = []
            for auth_id in all_author_ids:
                author = self.deref_entity_label(auth_id)
                all_authors.append(author)
            authors = '; '.join(all_authors)
        # save Authors
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 7
        cell.record = authors
        cell.save()
        cell = None

    def save_basic_default_field_cells(self, row_num, man):
        """ Saves the default fields that do not involve containment lookups """
        # save URI
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 1
        cell.record = URImanagement.make_oc_uri(man.uuid, man.item_type)
        cell.save()
        cell = None
        # save label
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 2
        cell.record = man.label
        cell.save()
        cell = None
        # save project label
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 3
        cell.record = self.deref_entity_label(man.project_uuid)
        cell.save()
        cell = None
        # save project URI
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 4
        cell.record = URImanagement.make_oc_uri(man.project_uuid, 'projects')
        cell.save()
        cell = None
        # save item category / class
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 5
        cell.record = self.deref_entity_label(man.class_uri)
        cell.save()
        cell = None
        # last updated
        if man.revised is datetime:
            last_update = man.revised
        else:
            last_update = man.record_updated
        cell = ExpCell()
        cell.table_id = self.table_id
        cell.uuid = man.uuid
        cell.project_uuid = man.project_uuid
        cell.row_num = row_num
        cell.field_num = 6
        cell.record = last_update.strftime('%Y-%m-%d')
        cell.save()
        cell = None

    def update_table_metadata(self):
        """ saves the final table author metadata """
        try:
            exp_tab = ExpTable.objects.get(table_id=self.table_id)
        except ExpTable.DoesNotExist:
            exp_tab = ExpTable()
            exp_tab.table_id = self.table_id
            exp_tab.label = '[Not yet named]'
        tcells_ok = ExpCell.objects.filter(table_id=self.table_id)[:1]
        if len(tcells_ok):
            sum_cell = ExpCell.objects\
                              .filter(table_id=self.table_id)\
                              .aggregate(Max('row_num'))
            exp_tab.row_count = sum_cell['row_num__max']
        else:
            exp_tab.row_count = 0
        tfields_ok = ExpField.objects.filter(table_id=self.table_id)[:1]
        if len(tfields_ok):
            sum_field = ExpField.objects\
                                .filter(table_id=self.table_id)\
                                .aggregate(Max('field_num'))
            exp_tab.field_count = sum_field['field_num__max']
        else:
            exp_tab.field_count = 0
        authors = LastUpdatedOrderedDict()
        if len(self.dc_contributor_ids) > 0:
            sauthors = sorted(self.dc_contributor_ids.items(),
                              key=lambda x: (-x[1], x[0]))
            authors['dc-terms:contributor'] = self.add_author_list(
                sauthors, 'contributor')
        if len(self.dc_creator_ids) > 0:
            sauthors = sorted(self.dc_creator_ids.items(),
                              key=lambda x: (-x[1], x[0]))
            authors['dc-terms:creator'] = self.add_author_list(
                sauthors, 'creator')
        exp_tab.meta_json = authors
        exp_tab.save()

    def add_author_list(self, sauthors, dc_type):
        """ makes an author list from a sorted tuple of
            author identifiers
        """
        i = 0
        author_list = []
        for uri_key, count in sauthors:
            i += 1
            auth = LastUpdatedOrderedDict()
            auth['id'] = '#' + dc_type + '-' + str(i)
            if 'http://' in uri_key or 'https://' in uri_key:
                auth['rdfs:isDefinedBy'] = uri_key
            else:
                auth['rdfs:isDefinedBy'] = URImanagement.make_oc_uri(
                    uri_key, 'persons')
            auth['label'] = self.deref_entity_label(uri_key)
            auth['count'] = count
            author_list.append(auth)
        return author_list

    def recursive_context_build(self, parent_level=0):
        """ recusrively builds a list of parent contexts """
        if parent_level == 0:
            sql = 'INSERT INTO exp_records(table_id, uuid, project_uuid,\
                   row_num, field_num, record_id, record)\
                   SELECT exp.table_id, exp.uuid, exp.project_uuid,\
                   exp.row_num, -1, pman.label, ass.uuid \
                   FROM exp_records AS exp \
                   LEFT OUTER JOIN oc_assertions AS ass\
                   ON (ass.object_uuid = exp.uuid \
                       AND ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\') \
                   LEFT OUTER JOIN oc_manifest AS pman ON (ass.uuid = pman.uuid) \
                   WHERE ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\' \
                   AND exp.table_id = \'' + self.table_id + '\' \
                   AND exp.field_num = 1; '

        else:
            sql = 'INSERT INTO exp_records(table_id, uuid, project_uuid,\
                   row_num, field_num, record_id, record)\
                   SELECT exp.table_id, exp.uuid, exp.project_uuid,\
                   exp.row_num, -1, pman.label, ass.uuid \
                   FROM exp_records AS exp \
                   LEFT OUTER JOIN oc_assertions AS ass\
                   ON (ass.object_uuid = exp.uuid \
                       AND ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\') \
                   LEFT OUTER JOIN oc_manifest AS pman ON (ass.uuid = pman.uuid) \
                   WHERE ass.predicate_uuid = \'' + Assertion.PREDICATES_CONTAINS + '\' \
                   AND exp.table_id = \'' + self.table_id + '\' \
                   AND exp.field_num = ' + parent_level + ' ;'
        parent_res = cursor.execute(sql)
        print(str(parent_res))
        parent_level = parent_level - 1

    def deref_entity_label(self, entity_id):
        """ Dereferences an entity """
        output = False
        if entity_id in self.entities:
            ent = self.entities[entity_id]
            output = ent.label
        else:
            ent = Entity()
            found = ent.dereference(entity_id)
            if found:
                output = ent.label
                self.entities[entity_id] = ent
            else:
                print('Missing id: ' + entity_id)
        return output
Пример #19
0
 def add_ld_cells(self, uuid, row_num, item_data, pred_ld_equiv_uri):
     """ Adds linked data records for an assertion """
     if self.do_boolean_multiple_ld_fields(pred_ld_equiv_uri):
         multi_ld_fields = True
     else:
         multi_ld_fields = False
     obj_values = LastUpdatedOrderedDict()
     obj_values['[URI]'] = []
     obj_values['[Label]'] = []
     obj_values['[Source]'] = []
     project_uuid = item_data[0].project_uuid
     for assertion in item_data:
         object_uuid = assertion.object_uuid
         if assertion.object_type == 'xsd:string':
             try:
                 oc_str = OCstring.objects.get(uuid=object_uuid)
                 obj_label = oc_str.content
             except OCstring.DoesNotExist:
                 obj_label = ''
         else:
             obj_label = self.deref_entity_label(object_uuid)
             obj_label = str(obj_label)
         if obj_label not in obj_values['[Source]']:
             obj_values['[Source]'].append(obj_label)
         obj_ld_found = False
         if object_uuid in self.ld_object_equivs:
             for obj_ld_equiv_uri in self.ld_object_equivs[object_uuid]:
                 obj_ld_found = True
                 if multi_ld_fields:
                     cell_value = self.boolean_multiple_ld_fields
                     field_num = self.get_add_ld_field_number('[Has]',
                                                              pred_ld_equiv_uri,
                                                              obj_ld_equiv_uri)
                     cell = ExpCell()
                     cell.table_id = self.table_id
                     cell.uuid = uuid
                     cell.project_uuid = project_uuid
                     cell.row_num = row_num
                     cell.field_num = field_num
                     cell.record = cell_value
                     cell.save()
                     cell = None
                 else:
                     # predicate not broken into seperate fields for different values
                     obj_equiv_label = self.deref_entity_label(obj_ld_equiv_uri)
                     if obj_equiv_label is False:
                         obj_equiv_label = obj_ld_equiv_uri
                     if obj_equiv_label not in obj_values['[Label]']:
                         obj_values['[Label]'].append(obj_equiv_label)
                     if obj_ld_equiv_uri not in obj_values['[URI]']:
                         obj_values['[URI]'].append(obj_ld_equiv_uri)
         if obj_ld_found is False:
             print('No linked data for object:' + object_uuid)
     if multi_ld_fields is False:
         # predicate not broken into seperate fields for different values
         for field_type, value_list in obj_values.items():
             if len(value_list) > 0:
                 try:
                     cell_value = '; '.join(value_list)
                 except:
                     # some messiness in the data, won't join into a string
                     cell_value = False
                     for val in value_list:
                         val = str(val)
                         if cell_value is False:
                             cell_value = val
                         else:
                             cell_value += '; ' + val
                 field_num = self.get_add_ld_field_number(field_type,
                                                          pred_ld_equiv_uri)
                 cell = ExpCell()
                 cell.table_id = self.table_id
                 cell.uuid = uuid
                 cell.project_uuid = project_uuid
                 cell.row_num = row_num
                 cell.field_num = field_num
                 cell.record = cell_value
                 cell.save()
                 cell = None
Пример #20
0
 def add_ld_cells(self, uuid, row_num, item_data, pred_ld_equiv_uri):
     """ Adds linked data records for an assertion """
     if self.do_boolean_multiple_ld_fields(pred_ld_equiv_uri):
         multi_ld_fields = True
     else:
         multi_ld_fields = False
     obj_values = LastUpdatedOrderedDict()
     obj_values['[URI]'] = []
     obj_values['[Label]'] = []
     obj_values['[Source]'] = []
     project_uuid = item_data[0].project_uuid
     for assertion in item_data:
         object_uuid = assertion.object_uuid
         if assertion.object_type == 'xsd:string':
             try:
                 oc_str = OCstring.objects.get(uuid=object_uuid)
                 obj_label = oc_str.content
             except OCstring.DoesNotExist:
                 obj_label = ''
         else:
             obj_label = self.deref_entity_label(object_uuid)
             obj_label = str(obj_label)
         if obj_label not in obj_values['[Source]']:
             obj_values['[Source]'].append(obj_label)
         obj_ld_found = False
         if object_uuid in self.ld_object_equivs:
             for obj_ld_equiv_uri in self.ld_object_equivs[object_uuid]:
                 obj_ld_found = True
                 if multi_ld_fields:
                     cell_value = self.boolean_multiple_ld_fields
                     field_num = self.get_add_ld_field_number(
                         '[Has]', pred_ld_equiv_uri, obj_ld_equiv_uri)
                     cell = ExpCell()
                     cell.table_id = self.table_id
                     cell.uuid = uuid
                     cell.project_uuid = project_uuid
                     cell.row_num = row_num
                     cell.field_num = field_num
                     cell.record = cell_value
                     cell.save()
                     cell = None
                 else:
                     # predicate not broken into seperate fields for different values
                     obj_equiv_label = self.deref_entity_label(
                         obj_ld_equiv_uri)
                     if obj_equiv_label is False:
                         obj_equiv_label = obj_ld_equiv_uri
                     if obj_equiv_label not in obj_values['[Label]']:
                         obj_values['[Label]'].append(obj_equiv_label)
                     if obj_ld_equiv_uri not in obj_values['[URI]']:
                         obj_values['[URI]'].append(obj_ld_equiv_uri)
         if obj_ld_found is False:
             print('No linked data for object:' + object_uuid)
     if multi_ld_fields is False:
         # predicate not broken into seperate fields for different values
         for field_type, value_list in obj_values.items():
             if len(value_list) > 0:
                 try:
                     cell_value = '; '.join(value_list)
                 except:
                     # some messiness in the data, won't join into a string
                     cell_value = False
                     for val in value_list:
                         val = str(val)
                         if cell_value is False:
                             cell_value = val
                         else:
                             cell_value += '; ' + val
                 field_num = self.get_add_ld_field_number(
                     field_type, pred_ld_equiv_uri)
                 cell = ExpCell()
                 cell.table_id = self.table_id
                 cell.uuid = uuid
                 cell.project_uuid = project_uuid
                 cell.row_num = row_num
                 cell.field_num = field_num
                 cell.record = cell_value
                 cell.save()
                 cell = None
Пример #21
0
class ArchEntsImport():
    """ Loads ArchEnts.xml files for import

from opencontext_py.apps.imports.faims.archents import ArchEntsImport
faims_ents = ArchEntsImport()
faims_ents.gen_config('faims-survey')

from opencontext_py.apps.imports.faims.archents import ArchEntsImport
faims_ents = ArchEntsImport()
faims_ents.db_initial_subjects_creation('faims-test')

Note: in the element <freetext> a user enters an annotation
on an observation.

<formattedIdentifierformattedIdentifier> is best to use for a label,
but the faims-uuid for the entity is the locally unique id 


    """

    FAIMS_ENTITY_TYPE_PREDICATE_LABEL = 'Entity Record Type'
    
    def __init__(self):
        self.tree = None
        self.project_uuid = False
        self.source_id = False
        self.import_persons = {}
        self.root_subject_label = False
        self.root_subject_uuid = False
        self.root_subject_context = False
        self.root_subject_class = 'oc-gen:cat-site'
        self.root_subject_sup_id = 'auto-root'
        self.load_into_importer = False
        self.dt_attribute_objs = LastUpdatedOrderedDict()
        self.attributes = LastUpdatedOrderedDict()
        self.entity_types = LastUpdatedOrderedDict()
        self.relation_types = LastUpdatedOrderedDict()
        self.entities = LastUpdatedOrderedDict()
        self.oc_config_relation_types = 'oc-relation-types'
        self.oc_config_entity_types = 'oc-entity-types'
        self.oc_config_attributes = 'oc-attributes'
        self.oc_config_entities = 'oc-entities'
        self.reconcile_key = 'faims_id'
        self.ent_type_pred_sup_id = 'auto-entity-type'
        self.fm = FileManage()

    def gen_config(self, act_dir, filename='archents.xml'):
        """ processes the archents file """
        self.tree = self.fm.load_xml_file(act_dir, filename)
        if self.tree is not False:
            self.load_or_classify_attributes(act_dir)
            self.load_or_get_entity_types(act_dir)
            self.check_update_relations_types(act_dir)

    def load_or_get_entity_types(self, act_dir):
        """ loads or classifies attributes in a tree """
        key = self.oc_config_entity_types
        json_obj = self.fm.get_dict_from_file(key, act_dir)
        if json_obj is None:
            # need to read the XML and get entity types
            self.get_xml_entity_types()
            self.fm.save_serialized_json(key,
                                         act_dir,
                                         self.entity_types)
        else:
            self.entity_types = json_obj

    def get_xml_entity_types(self):
        """ gets a list of different entity types in the
            FAIMS xml
        """
        if self.tree is not False:
            ent_types = self.tree.xpath('/archents/aenttype')
            for ent_type in ent_types:
                faims_id = ent_type.get('aentTypeID')
                ent_type_obj = LastUpdatedOrderedDict()
                ent_type_obj['id'] = faims_id
                ent_type_obj['label'] = ent_type.get('aentTypeName')
                ent_type_obj['item_type'] = None
                ent_type_obj['class_uri'] = None
                # add the type label as an attribute
                ent_type_obj['add_type_as_attribute'] = True
                ent_type_obj['predicate_uuid'] = None
                ent_type_obj['type_uuid'] = None
                # counts ranking
                xml_entities = ent_type.xpath('archentity')
                ent_type_obj['count'] = len(xml_entities)
                self.entity_types[faims_id] = ent_type_obj

    def load_or_classify_attributes(self, act_dir):
        """ loads or classifies attributes in a tree """
        key = self.oc_config_attributes
        json_obj = self.fm.get_dict_from_file(key, act_dir)
        if json_obj is None:
            # need to read the XML and make the classifications from scratch
            self.classify_xml_tree_attributes()
            # now make dictionary objects to save as JSON
            self.attributes = LastUpdatedOrderedDict()
            for prop_id, dt_class_obj in self.dt_attribute_objs.items():
                attrib_dict = dt_class_obj.make_dict_obj()
                attrib_dict['predicate_type'] = 'variable'
                attrib_dict['predicate_type'] = 'variable'  # default type
                attrib_dict['oc-equiv'] = None  # default to no equivalence
                attrib_dict = self.check_attribute_as_identifier(attrib_dict,
                                                                 ImportFieldAnnotation.PRED_CONTAINED_IN)
                if prop_id not in self.attributes:
                    self.attributes[prop_id] = attrib_dict
            self.fm.save_serialized_json(key,
                                         act_dir,
                                         self.attributes)
        else:
            # we have JSON with dictionary objects to read into the classes
            self.attributes = json_obj
            for prop_id, attrib_dict in self.attributes.items():
                dt_class_obj = DescriptionDataType()
                ok = dt_class_obj.read_dict_obj(attrib_dict)
                if ok:
                    self.dt_attribute_objs[prop_id] = dt_class_obj
            # now update if new attributes where found
            save_update = False
            for prop_id, dt_class_obj in self.dt_attribute_objs.items():
                attrib_dict = dt_class_obj.make_dict_obj()
                attrib_dict['predicate_type'] = 'variable'  # default type
                attrib_dict['oc-equiv'] = None  # default to no equivalence
                attrib_dict = self.check_attribute_as_identifier(attrib_dict,
                                                                 ImportFieldAnnotation.PRED_CONTAINED_IN)
                if prop_id not in self.attributes:
                    save_update = True
                    self.attributes[prop_id] = attrib_dict
            if save_update:
                self.fm.save_serialized_json(key,
                                             act_dir,
                                             self.attributes)

    def check_update_relations_types(self, act_dir):
        """ checks to see if different relation types are used in
            identifiers, updates accordingly
        """
        key = self.oc_config_relation_types
        json_obj = self.fm.get_dict_from_file(key, act_dir)
        if json_obj is not None:
            self.relation_types = json_obj
            for faims_id_pred, rel_dict in json_obj.items():
                rel_dict = self.check_attribute_as_identifier(rel_dict,
                                                              Assertion.PREDICATES_CONTAINS)
                self.relation_types[faims_id_pred] = rel_dict
            self.fm.save_serialized_json(key,
                                         act_dir,
                                         self.relation_types) 

    def check_attribute_as_identifier(self, attrib_dict, oc_equiv):
        """ checks to see if the attribute is used as an identifier
            if so, then it is likely part of a spatial context
        """
        if self.tree is not False:
            idents = self.tree.xpath('//identifiers/identifier')
            for ident in idents:
                if not isinstance(attrib_dict['oc-equiv'], str):
                    # check to see if we've got a matching attribute label
                    ident_names = ident.xpath('attributename')
                    for ident_name in ident_names:
                        if ident_name.text == attrib_dict['label']:
                            attrib_dict['oc-equiv'] = ImportFieldAnnotation.PRED_CONTAINED_IN
                            break
                else:
                    # we've got an equivalent so no need to loop
                    break
        return attrib_dict

    def classify_xml_tree_attributes(self):
        """ classifies attributes in a tree """
        if self.tree is not False:
            ent_types = self.tree.xpath('/archents/aenttype')
            for ent_type in ent_types: 
                ents = ent_type.xpath('archentity')
                for entity in ents:
                    props = entity.xpath('properties/property')
                    for prop in props:
                        prop_name = prop.xpath('attributename')[0].text
                        prop_id = prop.xpath('attributeid')[0].text
                        if prop_id not in self.attributes:
                            dt_class_obj = DescriptionDataType()
                            dt_class_obj.id = prop_id
                            dt_class_obj.label = prop_name
                        else:
                            dt_class_obj = self.attributes[prop_id]
                        record = self.get_property_record(prop)
                        if record is not None:
                            dt_class_obj.check_record_datatype(record)
                            dt_class_obj.data_type = dt_class_obj.classify_data_type()
                            self.dt_attribute_objs[prop_id] = dt_class_obj
    
    def db_initial_subjects_creation(self, act_dir, filename='archents.xml'):
        """ inital creation of subjects """
        self.tree = self.fm.load_xml_file(act_dir, filename)
        self.entities = self.fm.get_dict_from_file(self.oc_config_entities,
                                                   act_dir)
        if self.entities is None:
            self.entities = LastUpdatedOrderedDict()
        self.entity_types = self.fm.get_dict_from_file(self.oc_config_entity_types,
                                                       act_dir)
        if self.tree is not False and self.entity_types is not None:
            # we loaded the needed data, now to create the subject entities
            # first we make a temporary root item for the import,
            # this puts everything into an intial context tree
            self.db_create_temporary_root_subject()
            # now we get the entity types to check which ones are subjects to import
            ent_types = self.tree.xpath('/archents/aenttype')
            for ent_type in ent_types:
                faims_id = ent_type.get('aentTypeID')
                faims_id = str(faims_id)
                if faims_id in self.entity_types:
                    ent_dict = self.entity_types[faims_id]
                    if isinstance(ent_dict['class_uri'], str) \
                       and ent_dict['item_type'] == 'subjects':
                        # we have an entity type OK to make subjects with
                        # so we can now get the entity XML and make
                        print('OK to make subjects for: ' + ent_dict['label'])
                        xml_entities = ent_type.xpath('archentity')
                        for xml_ent in xml_entities:
                            faims_item_id = xml_ent.xpath('uuid')[0].text
                            item_label = xml_ent.xpath('identifiers/formattedIdentifier')[0].text
                            item_label = item_label.replace('{', '')
                            item_label = item_label.replace('}', '')
                            item_label = item_label.strip()
                            print('Import FAIMS-ID: ' + faims_item_id + ' label: ' + item_label)
                            self.db_create_initial_subject_item(act_dir,
                                                                ent_dict,
                                                                faims_item_id,
                                                                item_label) 
    
    def db_create_initial_subject_item(self,
                                       act_dir,
                                       ent_dict,
                                       faims_item_id,
                                       item_label):
        """ reconciles or makes a new subject item (manifest, subject,
            initial containment assertion)
        """
        if faims_item_id not in self.entities:
            # a new item, not seen before
            man_obj = self.check_get_faims_manifest_object(faims_item_id,
                                                           item_label,
                                                           ent_dict['item_type'],
                                                           ent_dict['class_uri'])
            if man_obj is False:
                # we did not find it, so make a new one
                # first, make the supplemental dict object to help associate the faims_item_id
                # with the manifest object. This makes reconcilation precise.
                sup_dict = {}
                sup_dict[self.reconcile_key] = faims_item_id
                sup_dict['faims_label'] = item_label
                # now, make sure the item label is unique
                item_label = self.check_make_manifest_label_unique(item_label,
                                                                   ent_dict['item_type'],
                                                                   ent_dict['class_uri'])
                # make the intial context, based on the root context's path
                context = self.root_subject_context + '/' + item_label
                uuid = GenUUID.uuid4()
                uuid = str(uuid)
                new_sub = Subject()
                new_sub.uuid = uuid
                new_sub.project_uuid = self.project_uuid
                new_sub.source_id = self.source_id
                new_sub.context = context
                new_sub.save()
                man_obj = Manifest()
                man_obj.uuid = uuid
                man_obj.project_uuid = self.project_uuid
                man_obj.source_id = self.source_id
                man_obj.item_type = 'subjects'
                man_obj.repo = ''
                man_obj.class_uri = ent_dict['class_uri']
                man_obj.label = item_label
                man_obj.des_predicate_uuid = ''
                man_obj.views = 0
                man_obj.sup_json = sup_dict
                man_obj.save()
                # now add the initial containment relationship
                self.add_change_containment_assertion(self.root_subject_uuid,
                                                      man_obj.uuid)
            # now save the open context uuid for the entity in the entities dict
            self.entities[faims_item_id] = LastUpdatedOrderedDict()
            self.entities[faims_item_id]['uuid'] = man_obj.uuid
            self.entities[faims_item_id]['item_type'] = man_obj.item_type
            self.fm.save_serialized_json(self.oc_config_entities,
                                         act_dir,
                                         self.entities)
    
    def check_make_manifest_label_unique(self,
                                         item_label,
                                         item_type,
                                         class_uri,
                                         label_suffix_num=1):
        """ checks to make sure a given label for a given item type
            is really unique in the manifest, if not add a suffix
        """
        original_label = item_label
        if label_suffix_num > 1:
            item_label += ' [' + str(label_suffix_num) + ']'
        man_objs = Manifest.objects\
                           .filter(label=item_label,
                                   item_type=item_type,
                                   class_uri=class_uri,
                                   project_uuid=self.project_uuid)[:1]
        if len(man_objs) > 0 and label_suffix_num < 10000:
            label_suffix_num += 1
            item_label = self.check_make_manifest_label_unique(original_label,
                                                               item_type,
                                                               class_uri,
                                                               label_suffix_num)
        return item_label
    
    def check_get_faims_manifest_object(self,
                                        faims_item_id,
                                        item_label,
                                        item_type,
                                        class_uri):
        """ checks to see if a faims entity has a manifest object, by
            matching label (including possible suffixes), item_type,
            class_uri, project AND faims_item_id
        """
        man_obj = False
        man_objs = Manifest.objects\
                           .filter(label__contains=item_label,
                                   item_type=item_type,
                                   class_uri=class_uri,
                                   project_uuid=self.project_uuid)
        if len(man_objs) > 0:
            for act_man_obj in man_objs:
                match_ok = act_man_obj.check_sup_json_key_value(self.reconcile_key,
                                                                faims_item_id)
                if match_ok:
                    # the faims_item_id matches the suplemental JSON dict key-value
                    # for this item, so we have a genuine matching manifest record
                    man_obj = act_man_obj
                    break
        return man_obj
    
    def add_change_containment_assertion(self, parent_uuid, child_uuid):
        """ adds or changes a containment assertion """
        contain_pred = Assertion.PREDICATES_CONTAINS
        del_old = Assertion.objects\
                           .filter(predicate_uuid=contain_pred,
                                   object_uuid=child_uuid)\
                           .delete()
        new_ass = Assertion()
        new_ass.uuid = parent_uuid
        new_ass.subject_type = 'subjects'
        new_ass.project_uuid = self.project_uuid
        new_ass.source_id = self.source_id
        new_ass.obs_node = '#contents-' + str(1)
        new_ass.obs_num = 1
        new_ass.sort = 1
        new_ass.visibility = 1
        new_ass.predicate_uuid = contain_pred
        new_ass.object_type = 'subjects'
        new_ass.object_uuid = child_uuid
        new_ass.save()
    
    def db_create_temporary_root_subject(self):
        """ makes a temporary root subject for the whole import
            makes it easier to move subjects into hiearchies later
        """
        if not isinstance(self.root_subject_label, str):
            self.root_subject_label = self.source_id + '-root'
        if not isinstance(self.root_subject_context, str):
            self.root_subject_context = self.root_subject_label
        if not isinstance(self.root_subject_uuid, str):
            man_objs = Manifest.objects\
                               .filter(label=self.root_subject_label,
                                       class_uri=self.root_subject_class,
                                       project_uuid=self.project_uuid)[:1]
            if len(man_objs) > 0:
                self.root_subject_uuid = man_objs[0].uuid
            else:
                # did not find a root subject, so make one
                sup_dict = {}
                sup_dict[self.reconcile_key] = self.root_subject_sup_id
                root_uuid = GenUUID.uuid4()
                root_uuid = str(root_uuid)
                self.root_subject_uuid = root_uuid
                new_sub = Subject()
                new_sub.uuid = self.root_subject_uuid
                new_sub.project_uuid = self.project_uuid
                new_sub.source_id = self.source_id
                new_sub.context = self.root_subject_context
                new_sub.save()
                new_man = Manifest()
                new_man.uuid = self.root_subject_uuid
                new_man.project_uuid = self.project_uuid
                new_man.source_id = self.source_id
                new_man.item_type = 'subjects'
                new_man.repo = ''
                new_man.class_uri = self.root_subject_class
                new_man.label = self.root_subject_label
                new_man.des_predicate_uuid = ''
                new_man.views = 0
                new_man.sup_json = sup_dict
                new_man.save()
    
    def db_save_reconcile_entity_predicates_types(self, act_dir):
        """ saves predicates and type items to the
            Open Context database, and / or reconciles these
            items with previously saved items from the same project
        """
        key = self.oc_config_entity_types
        json_obj = self.fm.get_dict_from_file(key, act_dir)
        if json_obj is None:
            print('Need to 1st generate an attributes file from the ArchEnts!')
            ok = False
        else:
            # we have JSON with dictionary for the entity_types
            self.entity_types = json_obj
            make_entity_types_assertions = False
            for faims_ent_type_id, ent_dict in json_obj.items():
                if isinstance(ent_dict['item_type'], str) \
                   and ent_dict['add_type_as_attribute']:
                    # OK we have some items that need entity types made as
                    # a descriptive attribute
                    make_entity_types_assertions = True
                    break
            if make_entity_types_assertions:
                # we have entity_types that need to have a descriptive
                # predicate, so create a new predicate in Open Context
                # to describe entity_types for this project
                sup_dict = LastUpdatedOrderedDict()
                sup_dict[self.reconcile_key] = self.ent_type_pred_sup_id
                pm = PredicateManagement()
                pm.project_uuid = self.project_uuid
                pm.source_id = self.source_id
                pm.sup_dict = sup_dict
                pm.sup_reconcile_key = self.reconcile_key
                pm.sup_reconcile_value = self.ent_type_pred_sup_id
                pred_obj = pm.get_make_predicate(self.FAIMS_ENTITY_TYPE_PREDICATE_LABEL,
                                                 'variable',
                                                 'id')
                if pred_obj is not False:
                    # we reconciled or created the predicate!
                    # now we mint oc_types for all the entity_types
                    predicate_uuid = str(pred_obj.uuid)
                    for faims_ent_type_id, ent_dict in json_obj.items():
                        if isinstance(ent_dict['item_type'], str) \
                           and ent_dict['add_type_as_attribute']:
                            # OK, we have an item entity type to be used as a description
                            sup_dict = LastUpdatedOrderedDict()
                            sup_dict[self.reconcile_key] = faims_ent_type_id
                            tm = TypeManagement()
                            tm.project_uuid = self.project_uuid
                            tm.source_id = self.source_id
                            tm.sup_dict = sup_dict
                            tm.sup_reconcile_key = self.reconcile_key
                            tm.sup_reconcile_value = faims_ent_type_id
                            type_obj = tm.get_make_type_within_pred_uuid(predicate_uuid,
                                                                         ent_dict['label'])
                            if type_obj is not False:
                                # we have reconciled the type!
                                ent_dict['type_uuid'] = str(type_obj.uuid)
                                ent_dict['predicate_uuid'] = predicate_uuid
                                self.entity_types[faims_ent_type_id] = ent_dict
                # now save the results
                self.fm.save_serialized_json(key,
                                             act_dir,
                                             self.entity_types)
        
    def db_save_entity_attributes(self, act_dir, filename='archents.xml'):
        """ saves descriptive attributes for an entity """
        if self.tree is None:
            # we have not imported the XML yet
            self.tree = self.fm.load_xml_file(act_dir, filename)
        if len(self.entities) < 1:
            self.entities = self.fm.get_dict_from_file(self.oc_config_entities,
                                                       act_dir)
        if len(self.entity_types) < 1:
            self.entity_types = self.fm.get_dict_from_file(self.oc_config_entity_types,
                                                           act_dir)
        if len(self.attributes) < 1:
            self.attributes = self.fm.get_dict_from_file(self.oc_config_attributes,
                                                         act_dir)
        if self.tree is not False \
           and self.entities is not None \
           and self.entity_types is not None \
           and self.attributes is not None:
            # we've loaded the data we need!
            print('Have all data needed to make entity descriptions....')
            ent_types = self.tree.xpath('/archents/aenttype')
            for ent_type in ent_types:
                faims_ent_type_id = ent_type.get('aentTypeID')
                faims_ent_type_id = str(faims_ent_type_id)
                if faims_ent_type_id in self.entity_types:
                    # we found the entity type in our configuration
                    ent_type_dict = self.entity_types[faims_ent_type_id]
                    # check if we should make entity type assertions?
                    record_entity_type = self.check_make_entity_type_assertion(ent_type_dict) 
                    xml_entities = ent_type.xpath('archentity')
                    for xml_ent in xml_entities:
                        faims_item_id = xml_ent.xpath('uuid')[0].text
                        if faims_item_id in self.entities:
                            # we found the entity in our saved, reconciled entities
                            subject_uuid = self.entities[faims_item_id]['uuid']
                            subject_type = self.entities[faims_item_id]['item_type']
                            sort_num = 10
                            if record_entity_type: 
                                # make assertion about the entity type
                                fd = FaimsDescription()
                                fd.project_uuid = self.project_uuid
                                fd.soure_id = self.source_id
                                fd.subject_uuid = subject_uuid
                                fd.subject_type = subject_type
                                fd.sort_num = sort_num
                                fd.add_type_description(ent_type_dict['predicate_uuid'],
                                                        ent_type_dict['type_uuid'])
                            props = xml_ent.xpath('properties/property')
                            for prop in props:
                                sort_num += 1
                                prop_id = prop.xpath('attributeid')[0].text
                                if prop_id in self.attributes:
                                    # we found the property attribute
                                    fd = FaimsDescription()
                                    fd.project_uuid = self.project_uuid
                                    fd.soure_id = self.source_id
                                    fd.subject_uuid = subject_uuid
                                    fd.subject_type = subject_type
                                    fd.sort_num = sort_num
                                    fd.attrib_dict = self.attributes[prop_id]
                                    fd.faims_record = self.get_property_record(prop)
                                    vocab_ids = prop.xpath('vocabid')
                                    for vocab_id in vocab_ids:
                                       fd.faims_record_id = vocab_id.text
                                    fd.add_description()
                       
    def process_entity(self, entity):
        """processes each entity """
        faims_uuid = entity.xpath('uuid')[0].text
        uuid = GenUUID.uuid4()
        uuid = str(uuid)
        print('FAIMS-UUID: ' + faims_uuid)
        print('UUID: ' + uuid)
        created_by = entity.xpath('createdBy')[0].text
        modified_by = entity.xpath('modifiedBy')[0].text
        created_by_uuid = self.get_make_person_uuid(created_by)
        modified_by_uuid = self.get_make_person_uuid(modified_by)
        print('Creator: ' + created_by + '(' + created_by_uuid + ')')
        print('Modified: ' + modified_by + '(' + modified_by_uuid + ')')
        print('-----------------------------------------')
    
    def get_property_record(self, prop):
        record = None
        rvocabs = prop.xpath('resolvedvocabname')
        for rvocab in rvocabs:
            record = rvocab.text
        if record is None:
            vocabs = prop.xpath('vocabname')
            for vocab in vocabs:
                record = vocab.text
        if record is None:
            measures = prop.xpath('measure')
            for measure in measures:
                record = measure.text
        return record

    def check_make_entity_type_assertion(self, ent_type_dict):
        """ make an entity type assertion ? """
        make_assertion = False
        if ent_type_dict['add_type_as_attribute']:
            if 'predicate_uuid' in ent_type_dict \
                and 'type_uuid' in ent_type_dict:
                if isinstance(ent_type_dict['predicate_uuid'], str) \
                    and isinstance(ent_type_dict['type_uuid'], str):
                    # we have data we need to make the assertion
                    make_assertion = True
        return make_assertion
Пример #22
0
 def add_filters_json(self, request_dict):
     """ adds JSON describing search filters """
     fl = FilterLinks()
     fl.base_search_link = self.base_search_link
     filters = []
     string_fields = []  # so we have an interface for string searches
     i = 0
     for param_key, param_vals in request_dict.items():
         if param_key == 'path':
             if param_vals is not False and param_vals is not None:
                 i += 1
                 f_entity = self.get_entity(param_vals, True)
                 label = http.urlunquote_plus(param_vals)
                 act_filter = LastUpdatedOrderedDict()
                 act_filter['id'] = '#filter-' + str(i)
                 act_filter['oc-api:filter'] = 'Context'
                 act_filter['label'] = label.replace('||', ' OR ')
                 if f_entity is not False:
                     act_filter['rdfs:isDefinedBy'] = f_entity.uri
                 # generate a request dict without the context filter
                 rem_request = fl.make_request_sub(request_dict, param_key,
                                                   param_vals)
                 act_filter['oc-api:remove'] = fl.make_request_url(
                     rem_request)
                 act_filter['oc-api:remove-json'] = fl.make_request_url(
                     rem_request, '.json')
                 filters.append(act_filter)
         else:
             for param_val in param_vals:
                 i += 1
                 remove_geodeep = False
                 act_filter = LastUpdatedOrderedDict()
                 act_filter['id'] = '#filter-' + str(i)
                 if self.hierarchy_delim in param_val:
                     all_vals = param_val.split(self.hierarchy_delim)
                 else:
                     all_vals = [param_val]
                 if param_key == 'proj':
                     # projects, only care about the last item in the parameter value
                     act_filter['oc-api:filter'] = 'Project'
                     label_dict = self.make_filter_label_dict(all_vals[-1])
                     act_filter['label'] = label_dict['label']
                     if len(label_dict['entities']) == 1:
                         act_filter['rdfs:isDefinedBy'] = label_dict[
                             'entities'][0].uri
                 elif param_key == 'prop':
                     # prop, the first item is the filter-label
                     # the last is the filter
                     act_filter['label'] = False
                     if len(all_vals) < 2:
                         act_filter['oc-api:filter'] = 'Description'
                     else:
                         filt_dict = self.make_filter_label_dict(
                             all_vals[0])
                         act_filter['oc-api:filter'] = filt_dict['label']
                         if filt_dict['data-type'] == 'string':
                             act_filter[
                                 'label'] = 'Search Term: \'' + all_vals[
                                     -1] + '\''
                     if act_filter['label'] is False:
                         label_dict = self.make_filter_label_dict(
                             all_vals[-1])
                         act_filter['label'] = label_dict['label']
                 elif param_key == 'type':
                     act_filter['oc-api:filter'] = 'Open Context Type'
                     if all_vals[0] in QueryMaker.TYPE_MAPPINGS:
                         type_uri = QueryMaker.TYPE_MAPPINGS[all_vals[0]]
                         label_dict = self.make_filter_label_dict(type_uri)
                         act_filter['label'] = label_dict['label']
                     else:
                         act_filter['label'] = all_vals[0]
                 elif param_key == 'q':
                     act_filter['oc-api:filter'] = 'General Keyword Search'
                     act_filter[
                         'label'] = 'Search Term: \'' + all_vals[0] + '\''
                 elif param_key == 'form-chronotile':
                     act_filter[
                         'oc-api:filter'] = 'Time of formation, use, or life'
                     chrono = ChronoTile()
                     dates = chrono.decode_path_dates(all_vals[0])
                     if isinstance(dates, dict):
                         act_filter['label'] = 'Time range: ' + str(
                             dates['earliest_bce'])
                         act_filter['label'] += ' to ' + str(
                             dates['latest_bce'])
                 elif param_key == 'disc-geotile':
                     act_filter[
                         'oc-api:filter'] = 'Location of discovery or observation'
                     act_filter['label'] = self.make_geotile_filter_label(
                         all_vals[0])
                     remove_geodeep = True
                 elif param_key == 'disc-bbox':
                     act_filter[
                         'oc-api:filter'] = 'Location of discovery or observation'
                     act_filter['label'] = self.make_bbox_filter_label(
                         all_vals[0])
                     remove_geodeep = True
                 elif param_key == 'images':
                     act_filter['oc-api:filter'] = 'Has related media'
                     act_filter['label'] = 'Linked to images'
                 elif param_key == 'other-media':
                     act_filter['oc-api:filter'] = 'Has related media'
                     act_filter[
                         'label'] = 'Linked to media (other than images)'
                 elif param_key == 'documents':
                     act_filter['oc-api:filter'] = 'Has related media'
                     act_filter['label'] = 'Linked to documents'
                 elif param_key == 'dc-subject':
                     act_filter['oc-api:filter'] = 'Has subject metadata'
                     label_dict = self.make_filter_label_dict(all_vals[-1])
                     if len(label_dict['label']) > 0:
                         act_filter['label'] = label_dict['label']
                     elif 'tdar' in all_vals[-1]:
                         act_filter[
                             'label'] = 'tDAR defined metadata record(s)'
                     if len(label_dict['entities']) == 1:
                         act_filter['rdfs:isDefinedBy'] = label_dict[
                             'entities'][0].uri
                         if label_dict['entities'][
                                 0].vocabulary is not False:
                             act_filter['label'] += ' in ' + label_dict[
                                 'entities'][0].vocabulary
                 elif param_key == 'dc-spatial':
                     act_filter['oc-api:filter'] = 'Has spatial metadata'
                     label_dict = self.make_filter_label_dict(all_vals[-1])
                     if len(label_dict['label']) > 0:
                         act_filter['label'] = label_dict['label']
                     if len(label_dict['entities']) == 1:
                         act_filter['rdfs:isDefinedBy'] = label_dict[
                             'entities'][0].uri
                         if label_dict['entities'][
                                 0].vocabulary is not False:
                             act_filter['label'] += ' in ' + label_dict[
                                 'entities'][0].vocabulary
                 elif param_key == 'dc-coverage':
                     act_filter[
                         'oc-api:filter'] = 'Has coverage / period metadata'
                     label_dict = self.make_filter_label_dict(all_vals[-1])
                     if len(label_dict['label']) > 0:
                         act_filter['label'] = label_dict['label']
                     if len(label_dict['entities']) == 1:
                         act_filter['rdfs:isDefinedBy'] = label_dict[
                             'entities'][0].uri
                         if label_dict['entities'][
                                 0].vocabulary is not False:
                             act_filter['label'] += ' in ' + label_dict[
                                 'entities'][0].vocabulary
                 elif param_key == 'dc-temporal':
                     act_filter['oc-api:filter'] = 'Has temporal coverage'
                     label_dict = self.make_filter_label_dict(all_vals[-1])
                     if len(label_dict['label']) > 0:
                         act_filter['label'] = label_dict['label']
                         if len(label_dict['entities']) == 1:
                             if label_dict['entities'][
                                     0].entity_type == 'vocabulary':
                                 act_filter[
                                     'label'] = 'Concepts defined by: ' + label_dict[
                                         'label']
                         elif 'periodo' in all_vals[-1]:
                             act_filter[
                                 'label'] = 'PeriodO defined concepts'
                     if len(label_dict['entities']) == 1:
                         act_filter['rdfs:isDefinedBy'] = label_dict[
                             'entities'][0].uri
                         if label_dict['entities'][0].vocabulary is not False\
                            and label_dict['entities'][0].vocabulary != label_dict['label']:
                             act_filter['label'] += ' in ' + label_dict[
                                 'entities'][0].vocabulary
                 elif param_key == 'dc-isReferencedBy':
                     act_filter['oc-api:filter'] = 'Is referenced by'
                     label_dict = self.make_filter_label_dict(all_vals[-1])
                     if len(label_dict['label']) > 0:
                         act_filter['label'] = label_dict['label']
                     if len(label_dict['entities']) == 1:
                         act_filter['rdfs:isDefinedBy'] = label_dict[
                             'entities'][0].uri
                         if label_dict['entities'][0].vocabulary is not False\
                            and label_dict['entities'][0].vocab_uri != label_dict['entities'][0].uri:
                             act_filter['label'] += ' in ' + label_dict[
                                 'entities'][0].vocabulary
                 elif param_key == 'linked' and all_vals[
                         -1] == 'dinaa-cross-ref':
                     act_filter['oc-api:filter'] = 'Has cross references'
                     act_filter[
                         'label'] = 'Links to, or with, DINAA curated site files'
                 else:
                     act_filter = False
                 if act_filter is not False:
                     rem_request = fl.make_request_sub(
                         request_dict, param_key, param_val)
                     if 'geodeep' in rem_request and remove_geodeep:
                         rem_request.pop('geodeep', None)
                     act_filter['oc-api:remove'] = fl.make_request_url(
                         rem_request)
                     act_filter['oc-api:remove-json'] = fl.make_request_url(
                         rem_request, '.json')
                     filters.append(act_filter)
     return filters
Пример #23
0
 def process_solr_tiles(self, solr_tiles):
     """ processes the solr_json 
         discovery geo tiles,
         aggregating to a certain
         depth
     """
     # first aggregate counts for tile that belong togther
     aggregate_tiles = LastUpdatedOrderedDict()
     i = -1
     t = 0
     if len(solr_tiles) <= 10:
         # don't aggregate if there's not much to aggregate
         self.aggregation_depth = self.max_depth
     for tile_key in solr_tiles[::2]:
         t += 1
         i += 2
         solr_facet_count = solr_tiles[i]
         if tile_key != 'false':
             if self.limiting_tile is False:
                 ok_to_add = True
             else:
                 # constrain to show facets ONLY within
                 # the current queried tile
                 if self.limiting_tile in tile_key:
                     ok_to_add = True
                 else:
                     ok_to_add = False
             if ok_to_add:
                 # first get full date range for
                 # facets that are OK to add
                 chrono_t = ChronoTile()
                 dates = chrono_t.decode_path_dates(tile_key)
                 if isinstance(dates, dict):
                     if self.min_date is False:
                         self.min_date = dates['earliest_bce']
                         self.max_date = dates['latest_bce']
                     else:
                         if self.min_date > dates['earliest_bce']:
                             self.min_date = dates['earliest_bce']
                         if self.max_date < dates['latest_bce']:
                             self.max_date = dates['latest_bce']
                 # now aggregrate the OK to use facets
                 trim_tile_key = tile_key[:self.aggregation_depth]
                 if trim_tile_key not in aggregate_tiles:
                     aggregate_tiles[trim_tile_key] = 0
                 aggregate_tiles[trim_tile_key] += solr_facet_count
     # now generate GeoJSON for each tile region
     # print('Chronology tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles)))
     # --------------------------------------------
     # code to sort the list of tiles by start date and time span
     # --------------------------------------------
     sorting_ranges = []
     for tile_key, aggregate_count in aggregate_tiles.items():
         chrono_t = ChronoTile()
         dates = chrono_t.decode_path_dates(tile_key)
         dates['tile_key'] = tile_key
         sorting_ranges.append(dates)
     # now sort by earliest bce, then reversed latest bce
     # this makes puts early dates with longest timespans first
     sorted_ranges = sorted(sorting_ranges,
                            key=lambda k: (k['earliest_bce'],
                                           -k['latest_bce']))
     sorted_tiles = LastUpdatedOrderedDict()
     for sort_range in sorted_ranges:
         tile_key = sort_range['tile_key']
         sorted_tiles[tile_key] = aggregate_tiles[tile_key]
     i = 0
     for tile_key, aggregate_count in sorted_tiles.items():
         i += 1
         fl = FilterLinks()
         fl.base_request_json = self.filter_request_dict_json
         fl.spatial_context = self.spatial_context
         new_rparams = fl.add_to_request('form-chronotile',
                                         tile_key)
         record = LastUpdatedOrderedDict()
         record['id'] = fl.make_request_url(new_rparams)
         record['json'] = fl.make_request_url(new_rparams, '.json')
         record['count'] = aggregate_count
         record['category'] = 'oc-api:chrono-facet'
         chrono_t = ChronoTile()
         dates = chrono_t.decode_path_dates(tile_key)
         # convert numeric to GeoJSON-LD ISO 8601
         record['start'] = ISOyears().make_iso_from_float(dates['earliest_bce'])
         record['stop'] = ISOyears().make_iso_from_float(dates['latest_bce'])
         properties = LastUpdatedOrderedDict()
         properties['early bce/ce'] = dates['earliest_bce']
         properties['late bce/ce'] = dates['latest_bce']
         record['properties'] = properties
         self.chrono_tiles.append(record)
Пример #24
0
 def __init__(self):
     self.parent_entities = []
     self.child_entities = LastUpdatedOrderedDict()
     self.loop_count = 0