Exemplo n.º 1
0
def add_get_gbif_link_entity(raw_uri):
    """Gets or adds a link_entity for a GBIF URI"""
    species_id = get_gbif_species_id_from_uri(raw_uri)
    uri = GBIF_BASE_URI + str(species_id)
    le = LinkEntity.objects.filter(uri=uri).first()
    if le:
        # Already in the database
        return le
    api = gbifAPI()
    can_name = api.get_gbif_cannonical_name(species_id)
    vern_name = api.get_gbif_vernacular_name(species_id)
    if not vern_name:
        vern_name = can_name
    print('Saving {} as {}, {}'.format(
        uri,
        can_name,
        vern_name,
    ))
    le = LinkEntity()
    le.uri = uri
    le.label = can_name
    le.alt_label = vern_name
    le.vocab_uri = GBIF_VOCAB_URI
    le.ent_type = 'class'
    le.sort = ''
    le.save()
    return le
Exemplo n.º 2
0
 def make_dinaa_link_assertions(self):
     """ makes assertions to relate DINAA URIs with federal
         registry documents
     """
     self.make_fed_reg_vocab_entity()
     fed_api = FederalRegistryAPI()
     search_key_list = fed_api.get_list_cached_keyword_searches()
     dinaa_matches = fed_api.get_dict_from_file(self.dinaa_matches_key)
     for s_key in search_key_list:
         s_json = fed_api.get_dict_from_file(s_key)
         if 'results' in s_json:
             for match in dinaa_matches:
                 for s_result in s_json['results']:
                     if s_result['document_number'] == match['doc']:
                         print('Found match for ' + match['doc'])
                         man_obj = False
                         try:
                             man_obj = Manifest.objects.get(
                                 uuid=match['uuid'])
                         except Manifest.DoesNotExist:
                             man_obj = False
                         if man_obj is not False:
                             fed_uri = s_result['html_url']
                             le_check = False
                             try:
                                 le_check = LinkEntity.objects.get(
                                     uri=fed_uri)
                             except LinkEntity.DoesNotExist:
                                 le_check = False
                             if le_check is False:
                                 print('Saving entity: ' +
                                       s_result['title'])
                                 title = s_result['title']
                                 if len(title) > 175:
                                     title = title[0:175] + '...'
                                 le = LinkEntity()
                                 le.uri = fed_uri
                                 le.label = title
                                 le.alt_label = s_result['document_number']
                                 le.vocab_uri = self.FEDERAL_REG_URI
                                 le.ent_type = 'instance'
                                 le.slug = 'fed-reg-docs-' + s_result[
                                     'document_number']
                                 le.save()
                             # Now save the link annotation
                             print('Adding ref link to ' + man_obj.label)
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = self.DC_TERMS_REF_BY
                             la.object_uri = fed_uri
                             try:
                                 la.save()
                             except:
                                 pass
Exemplo n.º 3
0
 def link_sites_from_filecache(self):
     """ updates Open Context to save new sites
         and annotations from the 
         file cache
     """
     if self.filecache is not None:
         # print('Cache update !: ' + self.cache_filekey)
         self.filecache.working_dir = self.working_dir
         json_obj = self.filecache.get_dict_from_file(self.cache_filekey)
         if isinstance(json_obj, dict):
             if 'trinomial_refs' in json_obj:
                 for tri_ref in json_obj['trinomial_refs']:
                     uri = tri_ref['rec_uri']
                     title = tri_ref['title']
                     if len(title) > 194:
                         title = title[0:190] + '... '
                     l_exists = LinkEntity.objects.filter(uri=uri)[:1]
                     if len(l_exists) < 1:
                         l_ent = LinkEntity()
                         l_ent.uri = uri
                         l_ent.label = title
                         l_ent.alt_label = title
                         l_ent.vocab_uri = tri_ref['source_uri']
                         l_ent.ent_type = 'class'
                         l_ent.save()
                     for trinomial in tri_ref['trinomials']:
                         man_objs = Manifest.objects.filter(
                             label=trinomial, class_uri='oc-gen:cat-site')
                         if len(man_objs) > 0:
                             man_obj = man_objs[0]
                             la = LinkAnnotation()
                             la.subject = uri  # the subordinate is the subject
                             la.subject_type = 'uri'
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = "skos:broader"
                             la.object_uri = tri_ref['source_uri']
                             la.save()
                             try:
                                 la.save()
                             except:
                                 pass
                             links = LinkAnnotation.objects\
                                                   .filter(subject=man_obj.uuid,
                                                           object_uri=uri)[:1]
                             if len(links) < 1:
                                 print('Link ' + man_obj.label + ' (' +
                                       man_obj.uuid + ') to ' + uri)
                                 la = LinkAnnotation()
                                 la.subject = man_obj.uuid  # the subordinate is the subject
                                 la.subject_type = man_obj.item_type
                                 la.project_uuid = man_obj.project_uuid
                                 la.source_id = self.source_id
                                 la.predicate_uri = 'dc-terms:isReferencedBy'
                                 la.object_uri = uri
                                 la.save()
                                 try:
                                     la.save()
                                 except:
                                     pass
Exemplo n.º 4
0
def add_missing_containing_regions(project_uuid='0', source_id=SOURCE_ID):
    """Adds missing containing regions that have site counts"""
    for state, state_uuid, new_region, new_uuid, geonames_uri in ADD_REGIONS:
        row = {
            'parent_uuid': state_uuid,
            'context_uuid': new_uuid,
            'label': new_region,
            'class_uri': 'oc-gen:cat-region',
        }
        load_context_row(project_uuid=project_uuid,
                         source_id=source_id,
                         row=row)
        ent_exists = LinkEntity.objects.filter(uri=geonames_uri).first()
        if not ent_exists:
            ent = LinkEntity()
            ent.uri = geonames_uri
            ent.label = new_region
            ent.alt_label = new_region
            ent.vocab_uri = GeonamesAPI().VOCAB_URI
            ent.ent_type = 'class'
            ent.save()
        la_exists = LinkAnnotation.objects.filter(
            subject=new_uuid, object_uri=geonames_uri).first()
        if not la_exists:
            new_la = LinkAnnotation()
            new_la.subject = new_uuid
            new_la.subject_type = 'subjects'
            new_la.project_uuid = project_uuid
            new_la.source_id = source_id
            new_la.predicate_uri = 'skos:closeMatch'
            new_la.object_uri = geonames_uri
            new_la.creator_uuid = ''
            new_la.save()
Exemplo n.º 5
0
 def find_related_geonames(self, username='******'):
     """ Adds geonames spatial data for items with geonames annotations """
     man_objs = Manifest.objects\
                        .filter(project_uuid='0',
                                class_uri='oc-gen:cat-region',
                                item_type='subjects')
     for man_obj in man_objs:
         print('Checking slug: ' + man_obj.slug)
         subj_obj = Subject.objects.get(uuid=man_obj.uuid)
         context = subj_obj.context
         if '/' in context:
             cont_ex = context.split('/')
             admin_level = len(cont_ex) - 1
             if admin_level < 0:
                 admin_level = 0
         else:
             admin_level = 0
         q_str = context.replace('/', ' ')
         geo_api = GeonamesAPI()
         json_r = geo_api.search_admin_entity(q_str, admin_level, username)
         if isinstance(json_r, dict):
             # we found a result from GeoNames!
             print('Geonames result found.')
             if 'geonames' in json_r:
                 if len(json_r['geonames']) > 0:
                     # we've got a result
                     geo_id = json_r['geonames'][0]['geonameId']
                     label = json_r['geonames'][0]['name']
                     alt_label = json_r['geonames'][0]['toponymName']
                     geonames_uri = 'http://www.geonames.org/' + str(geo_id)
                     l_ents = LinkEntity.objects\
                                        .filter(uri=geonames_uri)[:1]
                     if len(l_ents) < 1:
                         # we need to create this entity
                         ent = LinkEntity()
                         ent.uri = geonames_uri
                         ent.label = label
                         ent.alt_label = alt_label
                         ent.vocab_uri = GeonamesAPI().VOCAB_URI
                         ent.ent_type = 'class'
                         ent.save()
                     print(geonames_uri)
                     annos = LinkAnnotation.objects\
                                           .filter(subject=man_obj.uuid,
                                                   object_uri=geonames_uri)[:1]
                     if len(annos) < 1:
                         # we need to add the annotation linking this item
                         print('Adding new annotation!')
                         new_la = LinkAnnotation()
                         new_la.subject = man_obj.uuid
                         new_la.subject_type = man_obj.item_type
                         new_la.project_uuid = man_obj.project_uuid
                         new_la.source_id = man_obj.source_id
                         new_la.predicate_uri = 'skos:closeMatch'
                         new_la.object_uri = geonames_uri
                         new_la.creator_uuid = ''
                         new_la.save()
                     else:
                         print('Relation already known.')
Exemplo n.º 6
0
 def save_csv_from_filecache(self):
     """ updates Open Context to save new sites
         and annotations from the 
         file cache
     """
     if self.filecache is not None:
         # print('Cache update !: ' + self.cache_filekey)
         self.filecache.working_dir = self.working_dir
         json_obj = self.filecache.get_dict_from_file(self.cache_filekey)
         filename = self.cache_filekey + '.csv'
         directory = self.filecache.prep_directory(self.working_dir)
         dir_filename = os.path.join(directory, filename)
         if isinstance(json_obj, dict):
             if 'trinomial_refs' in json_obj:
                 field_name_row = [
                     'County Code', 'County Name', 'Trinomial', 'Citation',
                     'URI', 'Title', 'Note'
                 ]
                 f = codecs.open(dir_filename, 'w', encoding='utf-8')
                 writer = csv.writer(f,
                                     dialect=csv.excel,
                                     quoting=csv.QUOTE_ALL)
                 writer.writerow(field_name_row)
                 for tri_ref in json_obj['trinomial_refs']:
                     citation = tri_ref['citation_html']
                     uri = tri_ref['rec_uri']
                     title = tri_ref['title']
                     if len(title) > 194:
                         title = title[0:190] + '... '
                     l_exists = LinkEntity.objects.filter(uri=uri)[:0]
                     if len(l_exists) < 1:
                         l_ent = LinkEntity()
                         l_ent.uri = uri
                         l_ent.label = title
                         l_ent.alt_label = title
                         l_ent.vocab_uri = tri_ref['source_uri']
                         l_ent.ent_type = 'class'
                         l_ent.save()
                     if 'note' in tri_ref:
                         note = tri_ref['note']
                     else:
                         note = ''
                     for trinomial in tri_ref['trinomials']:
                         county_code = trinomial[0:4]
                         act_county_name = None
                         for key, county_name in self.COUNTY_PREFIXES.items(
                         ):
                             if county_code == key:
                                 act_county_name = county_name
                                 break
                         print('County code: ' + county_code + ' is ' +
                               str(act_county_name))
                         row = [
                             county_code, act_county_name, trinomial,
                             citation, uri, title, note
                         ]
                         writer.writerow(row)
                 print('Done!')
                 f.closed
Exemplo n.º 7
0
 def save_entity_labels(self):
     """ saves labels of entities in a vocabulary """
     output = False
     if self.graph is not False and self.vocabulary_uri is not False:
         output = []
         if self.replace_old:
             LinkEntity.objects.filter(vocab_uri=self.vocabulary_uri).delete()
         for s, p, o in self.graph.triples((None,
                                            RDFS.label,
                                            None)):
             subject_uri = s.__str__()  # get the URI of the subject as a string
             label = o.__str__()  # get the Label of from object as a string
             le_ents = LinkEntity.objects.filter(uri=subject_uri)[:1]
             if len(le_ents) < 1 or self.replace_old:
                 newr = LinkEntity()
                 newr.uri = subject_uri
                 newr.label = label
                 newr.alt_label = label
                 newr.ent_type = 'class'
                 newr.vocab_uri = self.vocabulary_uri
                 newr.save()
                 act_t = {'s': subject_uri,
                          'o': label}
                 output.append(act_t)
     return output
Exemplo n.º 8
0
 def link_sites_from_filecache(self):
     """ updates Open Context to save new sites
         and annotations from the 
         file cache
     """
     if self.filecache is not None:
         # print('Cache update !: ' + self.cache_filekey)
         self.filecache.working_dir = self.working_dir
         json_obj = self.filecache.get_dict_from_file(self.cache_filekey)
         if isinstance(json_obj, dict):
             if 'trinomial_refs' in json_obj:
                 for tri_ref in json_obj['trinomial_refs']:
                     uri = tri_ref['rec_uri']
                     title = tri_ref['title']
                     if len(title) > 194:
                         title = title[0:190] + '... '
                     l_exists = LinkEntity.objects.filter(uri=uri)[:1]
                     if len(l_exists) < 1:
                         l_ent = LinkEntity()
                         l_ent.uri = uri
                         l_ent.label = title
                         l_ent.alt_label = title
                         l_ent.vocab_uri = tri_ref['source_uri']
                         l_ent.ent_type = 'class'
                         l_ent.save()
                     for trinomial in tri_ref['trinomials']:
                         man_objs = Manifest.objects.filter(label=trinomial,
                                                            class_uri='oc-gen:cat-site')
                         if len(man_objs) > 0:
                             man_obj = man_objs[0]
                             la = LinkAnnotation()
                             la.subject = uri  # the subordinate is the subject
                             la.subject_type = 'uri'
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = "skos:broader"
                             la.object_uri = tri_ref['source_uri']
                             la.save()
                             try:
                                 la.save()
                             except:
                                 pass
                             links = LinkAnnotation.objects\
                                                   .filter(subject=man_obj.uuid,
                                                           object_uri=uri)[:1]
                             if len(links) < 1:
                                 print('Link ' + man_obj.label + ' (' +   man_obj.uuid + ') to ' + uri)
                                 la = LinkAnnotation()
                                 la.subject = man_obj.uuid  # the subordinate is the subject
                                 la.subject_type = man_obj.item_type
                                 la.project_uuid = man_obj.project_uuid
                                 la.source_id = self.source_id
                                 la.predicate_uri = 'dc-terms:isReferencedBy'
                                 la.object_uri = uri
                                 la.save()
                                 try:
                                     la.save()
                                 except:
                                     pass
Exemplo n.º 9
0
 def make_fed_reg_vocab_entity(self):
     """ makes a vocabulary entity for the federal registry """
     try:
         le_check = LinkEntity.objects.get(uri=self.FEDERAL_REG_URI)
     except LinkEntity.DoesNotExist:
         le_check = False
     if le_check is False:
         le = LinkEntity()
         le.uri = self.FEDERAL_REG_URI
         le.label = self.FEDERAL_REG_LABEL
         le.alt_label = self.FEDERAL_REG_LABEL
         le.vocab_uri = self.FEDERAL_REG_URI
         le.ent_type = 'vocabulary'
         le.slug = 'fed-reg'
         le.save()
Exemplo n.º 10
0
 def check_add_periodo_vocab(self):
     """ Adds the periodo vocabulary if it doesn't exist yet
     """
     lev = LinkEntity.objects.filter(uri=self.PERIODO_VOCAB_URI)[:1]
     if len(lev) < 1:
         le = LinkEntity()
         le.uri = self.PERIODO_VOCAB_URI
         le.label = 'PeriodO'
         le.alt_label = 'PeriodO (http://perio.do)'
         le.vocab_uri = self.PERIODO_VOCAB_URI
         le.ent_type = 'vocabulary'
         le.save()
Exemplo n.º 11
0
 def make_dinaa_link_assertions(self):
     """ makes assertions to relate DINAA URIs with federal
         registry documents
     """
     self.make_fed_reg_vocab_entity()
     fed_api = FederalRegistryAPI()
     search_key_list = fed_api.get_list_cached_keyword_searches()
     dinaa_matches = fed_api.get_dict_from_file(self.dinaa_matches_key)
     for s_key in search_key_list:
         s_json = fed_api.get_dict_from_file(s_key)
         if 'results' in s_json:
             for match in dinaa_matches:
                 for s_result in s_json['results']:
                     if s_result['document_number'] == match['doc']:
                         print('Found match for ' + match['doc'])
                         man_obj = False
                         try:
                             man_obj = Manifest.objects.get(uuid=match['uuid'])
                         except Manifest.DoesNotExist:
                             man_obj = False
                         if man_obj is not False:
                             fed_uri = s_result['html_url']
                             le_check = False
                             try:
                                 le_check = LinkEntity.objects.get(uri=fed_uri)
                             except LinkEntity.DoesNotExist:
                                 le_check = False
                             if le_check is False:
                                 print('Saving entity: ' + s_result['title'])
                                 title = s_result['title']
                                 if len(title) > 175:
                                     title = title[0:175] + '...'
                                 le = LinkEntity()
                                 le.uri = fed_uri
                                 le.label = title
                                 le.alt_label = s_result['document_number']
                                 le.vocab_uri = self.FEDERAL_REG_URI
                                 le.ent_type = 'instance'
                                 le.slug = 'fed-reg-docs-' + s_result['document_number']
                                 le.save()
                             # Now save the link annotation
                             print('Adding ref link to ' + man_obj.label)
                             la = LinkAnnotation()
                             la.subject = man_obj.uuid
                             la.subject_type = man_obj.item_type
                             la.project_uuid = man_obj.project_uuid
                             la.source_id = self.source_id
                             la.predicate_uri = self.DC_TERMS_REF_BY
                             la.object_uri = fed_uri
                             try:
                                 la.save()
                             except:
                                 pass
Exemplo n.º 12
0
 def validate_fix_eol_objects(self):
     """ Searches for EOL links in the
         LinkAnnotations table, then fixes
         badly URIs with cruft. Also
         calls the EOL API to get labels
         for URIs with no record in the LinkEntity
         table.
     """
     checked_uris = []
     eol_las = LinkAnnotation.objects\
                             .filter(object_uri__icontains=self.EOL_URI_PREFIX)
     for eol_la in eol_las:
         eol_uri = eol_la.object_uri
         leg = LinkEntityGeneration()
         le_gen = LinkEntityGeneration()
         eol_uri = le_gen.make_clean_uri(
             eol_uri)  # strip off any cruft in the URI
         if eol_uri != eol_la.object_uri:
             print('Has cruft: ' + str(eol_la.object_uri))
             LinkAnnotation.objects\
                           .filter(hash_id=eol_la.hash_id)\
                           .delete() # delete the old
             eol_la.object_uri = eol_uri
             eol_la.save()  # save the cleaned URI
         if eol_uri not in checked_uris:
             # only check on a given URI once
             checked_uris.append(eol_uri)
             try:
                 le = LinkEntity.objects.get(uri=eol_uri)
             except LinkEntity.DoesNotExist:
                 le = False
             if le is False:
                 print('Getting missing data for: ' + eol_uri)
                 label = False
                 eol_api = eolAPI()
                 eol_data = eol_api.get_basic_json_for_eol_uri(eol_uri)
                 if isinstance(eol_data, dict):
                     print('Reading data...')
                     if 'scientificName' in eol_data:
                         label = eol_data['scientificName']
                 else:
                     print('Failed to read data: ' + str(eol_data))
                 if label is not False:
                     print('Saving data for: ' + str(label) + ' (' +
                           eol_uri + ')')
                     le = LinkEntity()
                     le.uri = eol_uri
                     le.label = label
                     le.alt_label = label
                     le.ent_type = 'class'
                     le.vocab_uri = self.EOL_VOCAB_URI
                     le.save()
Exemplo n.º 13
0
 def check_add_period_pred(self):
     """ Adds the periodo vocabulary if it doesn't exist yet
     """
     temporal_pred = 'http://purl.org/dc/terms/temporal'
     lev = LinkEntity.objects.filter(uri=temporal_pred)[:1]
     if len(lev) < 1:
         le = LinkEntity()
         le.uri = temporal_pred
         le.label = 'Temporal Coverage'
         le.alt_label = 'Temporal Coverage'
         le.vocab_uri = 'http://purl.org/dc/terms'
         le.ent_type = 'property'
         le.save()
Exemplo n.º 14
0
 def make_fed_reg_vocab_entity(self):
     """ makes a vocabulary entity for the federal registry """
     try:
         le_check = LinkEntity.objects.get(uri=self.FEDERAL_REG_URI)
     except LinkEntity.DoesNotExist:
         le_check = False
     if le_check is False:
         le = LinkEntity()
         le.uri = self.FEDERAL_REG_URI
         le.label = self.FEDERAL_REG_LABEL
         le.alt_label = self.FEDERAL_REG_LABEL
         le.vocab_uri = self.FEDERAL_REG_URI
         le.ent_type = 'vocabulary'
         le.slug = 'fed-reg'
         le.save()
Exemplo n.º 15
0
 def check_add_period(self, p_ref):
     """ Checks to see if a period collection is in
         the database, adds it if needed
     """
     if not p_ref['period-meta']['uri'] in self.db_uris:
         # not in memory for being in the database
         lev = LinkEntity.objects.filter(
             uri=p_ref['period-meta']['uri'])[:1]
         if len(lev) < 1 or self.update_period:
             le = LinkEntity()
             le.uri = p_ref['period-meta']['uri']
             le.label = p_ref['period-meta']['label-range']
             le.alt_label = p_ref['period-meta']['label']
             le.vocab_uri = p_ref['collection']['uri']
             le.ent_type = 'class'
             le.save()
         self.db_uris.append(p_ref['period-meta']['uri'])
Exemplo n.º 16
0
 def validate_fix_uberon_objects(self):
     """ Searches for UBERON links in the
         LinkAnnotations table, then fixes
         badly URIs with cruft. Also
         calls the UBERON API to get labels
         for URIs with no record in the LinkEntity
         table.
     """
     checked_uris = []
     uberon_las = LinkAnnotation.objects\
                                .filter(object_uri__icontains=self.UBERON_URI_PREFIX)
     for uberon_la in uberon_las:
         uberon_uri = uberon_la.object_uri
         le_gen = LinkEntityGeneration()
         uberon_uri = le_gen.make_clean_uri(
             uberon_uri)  # strip off any cruft in the URI
         if uberon_uri != uberon_la.object_uri:
             print('Has cruft: ' + str(uberon_la.object_uri))
             LinkAnnotation.objects\
                           .filter(hash_id=uberon_la.hash_id)\
                           .delete()  # delete the old
             uberon_la.object_uri = uberon_uri
             uberon_la.save()  # save the cleaned URI
         if uberon_uri not in checked_uris:
             # only check on a given URI once
             checked_uris.append(uberon_uri)
             try:
                 le = LinkEntity.objects.get(uri=uberon_uri)
             except LinkEntity.DoesNotExist:
                 le = False
             if le is False:
                 print('Getting missing data for: ' + uberon_uri)
                 u_api = uberonAPI()
                 label = u_api.get_uri_label_from_graph(uberon_uri)
                 if label is False:
                     print('Failed to read data for : ' + str(uberon_uri))
                 else:
                     print('Saving data for: ' + str(label) + ' (' +
                           uberon_uri + ')')
                     le = LinkEntity()
                     le.uri = uberon_uri
                     le.label = label
                     le.alt_label = label
                     le.ent_type = 'class'
                     le.vocab_uri = self.UBERON_VOCAB_URI
                     le.save()
Exemplo n.º 17
0
 def check_add_period_collection(self, p_ref):
     """ Checks to see if a period collection is in
         the database, adds it if needed
     """
     if not p_ref['collection']['uri'] in self.db_uris:
         # not in memory for being in the database
         lev = LinkEntity.objects.filter(uri=p_ref['collection']['uri'])[:1]
         if len(lev) < 1:
             le = LinkEntity()
             le.uri = p_ref['collection']['uri']
             le.label = 'PeriodO Collection: ' + p_ref['collection']['label']
             le.alt_label = 'PeriodO (http://perio.do): ' + p_ref[
                 'collection']['label']
             le.vocab_uri = self.PERIODO_VOCAB_URI
             le.ent_type = 'vocabulary'
             le.save()
         self.db_uris.append(p_ref['collection']['uri'])
Exemplo n.º 18
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     i = 0
     for record in recs:
         i += 1
         allow_write = self.check_allow_write(act_table, record)
         record = self.prep_update_keep_old(act_table, record)
         if (allow_write is False and self.update_keep_old is False):
             print('\n Not allowed to overwite record.' + str(i))
         else:
             # print('\n Adding record:' + str(record))
             newr = False
             if (act_table == 'link_annotations'):
                 newr = LinkAnnotation(**record)
             elif (act_table == 'link_entities'):
                 newr = LinkEntity(**record)
             elif (act_table == 'oc_assertions'):
                 newr = Assertion(**record)
             elif (act_table == 'oc_manifest'):
                 newr = Manifest(**record)
             elif (act_table == 'oc_subjects'):
                 newr = Subject(**record)
             elif (act_table == 'oc_mediafiles'):
                 newr = Mediafile(**record)
             elif (act_table == 'oc_documents'):
                 newr = OCdocument(**record)
             elif (act_table == 'oc_persons'):
                 newr = Person(**record)
             elif (act_table == 'oc_projects'):
                 newr = Project(**record)
             elif (act_table == 'oc_strings'):
                 newr = OCstring(**record)
             elif (act_table == 'oc_types'):
                 newr = OCtype(**record)
             elif (act_table == 'oc_geospace'):
                 newr = Geospace(**record)
             elif (act_table == 'oc_events'):
                 newr = Event(**record)
             elif (act_table == 'oc_predicates'):
                 newr = Predicate(**record)
             elif (act_table == 'oc_identifiers'):
                 newr = StableIdentifer(**record)
             elif (act_table == 'oc_obsmetadata'):
                 newr = ObsMetadata(**record)
             if (newr is not False):
                 try:
                     newr.save(force_insert=self.force_insert,
                               force_update=self.update_keep_old)
                 except Exception as error:
                     print('Something slipped past in ' + act_table +
                           '...' + str(error))
Exemplo n.º 19
0
 def check_add_period(self, p_ref, vocab_uri):
     """ Checks to see if a period collection is in
         the database, adds it if needed
     """
     if isinstance(p_ref, dict):
         uri = PeriodoAPI.URI_PREFIX + p_ref['id']
         if not uri in self.db_uris:
             # not in memory for being in the database
             lev = LinkEntity.objects.filter(uri=uri)[:1]
             if len(lev) < 1 or self.update_period:
                 le = LinkEntity()
                 le.uri = uri
                 le.label = p_ref['label']
                 le.alt_label = p_ref['alt_label']
                 le.vocab_uri = vocab_uri
                 le.ent_type = 'class'
                 le.save()
             self.db_uris.append(uri)
Exemplo n.º 20
0
 def check_add_period_collection(self, p_ref):
     """ Checks to see if a period collection is in
         the database, adds it if needed
     """
     if isinstance(p_ref, dict):
         uri = PeriodoAPI.URI_PREFIX + p_ref['id']
         if not uri in self.db_uris:
             # not in memory for being in the database
             lev = LinkEntity.objects.filter(uri=uri)[:1]
             if len(lev) < 1 or self.update_period:
                 le = LinkEntity()
                 le.uri = uri
                 le.label = 'PeriodO Collection: ' + p_ref['source']['title']
                 le.alt_label = 'PeriodO (http://perio.do): ' + p_ref['source']['title']
                 le.vocab_uri = self.PERIODO_VOCAB_URI
                 le.ent_type = 'vocabulary'
                 le.save()
             self.db_uris.append(uri)
Exemplo n.º 21
0
 def store_records(self, act_table, recs):
     """
     stores records retrieved for a given table
     """
     for rkey, record in recs.items():
         if (act_table == 'link_annotations'):
             newr = LinkAnnotation(**record)
             newr.save()
         elif (act_table == 'link_entities'):
             newr = LinkEntity(**record)
             newr.save()
         elif (act_table == 'link_hierarchies'):
             newr = LinkHierarchy(**record)
             newr.save()
         elif (act_table == 'oc_chronology'):
             newr = Chronology(**record)
             newr.save()
         elif (act_table == 'oc_geodata'):
             newr = Geodata(**record)
             newr.save()
         elif (act_table == 'oc_mediafiles'):
             newr = Mediafile(**record)
             newr.save()
         elif (act_table == 'oc_documents'):
             newr = OCdocument(**record)
             newr.save()
         elif (act_table == 'oc_persons'):
             newr = Person(**record)
             newr.save()
         elif (act_table == 'oc_projects'):
             newr = Project(**record)
             newr.save()
         elif (act_table == 'oc_strings'):
             newr = OCstring(**record)
             newr.save()
         elif (act_table == 'oc_types'):
             newr = OCtype(**record)
             newr.save()
         elif (act_table == 'oc_events'):
             newr = Event(**record)
             newr.save()
         elif (act_table == 'oc_predicates'):
             newr = Predicate(**record)
             newr.save()
         elif (act_table == 'oc_identifiers'):
             newr = StableIdentifer(**record)
             newr.save()
         elif (act_table == 'oc_obsmetadata'):
             newr = ObsMetadata(**record)
             newr.save()
Exemplo n.º 22
0
 def validate_fix_eol_objects(self):
     """ Searches for EOL links in the
         LinkAnnotations table, then fixes
         badly URIs with cruft. Also
         calls the EOL API to get labels
         for URIs with no record in the LinkEntity
         table.
     """
     checked_uris = []
     eol_las = LinkAnnotation.objects.filter(object_uri__icontains=self.EOL_URI_PREFIX)
     for eol_la in eol_las:
         eol_uri = eol_la.object_uri
         leg = LinkEntityGeneration()
         le_gen = LinkEntityGeneration()
         eol_uri = le_gen.make_clean_uri(eol_uri)  # strip off any cruft in the URI
         if eol_uri != eol_la.object_uri:
             print("Has cruft: " + str(eol_la.object_uri))
             LinkAnnotation.objects.filter(hash_id=eol_la.hash_id).delete()  # delete the old
             eol_la.object_uri = eol_uri
             eol_la.save()  # save the cleaned URI
         if eol_uri not in checked_uris:
             # only check on a given URI once
             checked_uris.append(eol_uri)
             try:
                 le = LinkEntity.objects.get(uri=eol_uri)
             except LinkEntity.DoesNotExist:
                 le = False
             if le is False:
                 print("Getting missing data for: " + eol_uri)
                 label = False
                 eol_api = eolAPI()
                 eol_data = eol_api.get_basic_json_for_eol_uri(eol_uri)
                 if isinstance(eol_data, dict):
                     print("Reading data...")
                     if "scientificName" in eol_data:
                         label = eol_data["scientificName"]
                 else:
                     print("Failed to read data: " + str(eol_data))
                 if label is not False:
                     print("Saving data for: " + str(label) + " (" + eol_uri + ")")
                     le = LinkEntity()
                     le.uri = eol_uri
                     le.label = label
                     le.alt_label = label
                     le.ent_type = "class"
                     le.vocab_uri = self.EOL_VOCAB_URI
                     le.save()
Exemplo n.º 23
0
 def validate_fix_uberon_objects(self):
     """ Searches for UBERON links in the
         LinkAnnotations table, then fixes
         badly URIs with cruft. Also
         calls the UBERON API to get labels
         for URIs with no record in the LinkEntity
         table.
     """
     checked_uris = []
     uberon_las = LinkAnnotation.objects\
                                .filter(object_uri__icontains=self.UBERON_URI_PREFIX)
     for uberon_la in uberon_las:
         uberon_uri = uberon_la.object_uri
         le_gen = LinkEntityGeneration()
         uberon_uri = le_gen.make_clean_uri(uberon_uri)  # strip off any cruft in the URI
         if uberon_uri != uberon_la.object_uri:
             print('Has cruft: ' + str(uberon_la.object_uri))
             LinkAnnotation.objects\
                           .filter(hash_id=uberon_la.hash_id)\
                           .delete()  # delete the old
             uberon_la.object_uri = uberon_uri
             uberon_la.save()  # save the cleaned URI
         if uberon_uri not in checked_uris:
             # only check on a given URI once
             checked_uris.append(uberon_uri)
             try:
                 le = LinkEntity.objects.get(uri=uberon_uri)
             except LinkEntity.DoesNotExist:
                 le = False
             if le is False:
                 print('Getting missing data for: ' + uberon_uri)
                 u_api = uberonAPI()
                 label = u_api.get_uri_label_from_graph(uberon_uri)
                 if label is False:
                     print('Failed to read data for : ' + str(uberon_uri))
                 else:
                     print('Saving data for: ' + str(label) + ' (' + uberon_uri + ')')
                     le = LinkEntity()
                     le.uri = uberon_uri
                     le.label = label
                     le.alt_label = label
                     le.ent_type = 'class'
                     le.vocab_uri = self.UBERON_VOCAB_URI
                     le.save()
Exemplo n.º 24
0
 def get_save_entity_label(self, eol_uri):
     """ gets the entity label from the EOL API
         and saves it
     """
     label = False
     eol_api = eolAPI()
     eol_data = eol_api.get_basic_json_for_eol_uri(eol_uri)
     if isinstance(eol_data, dict):
         print('Reading data...')
         if 'scientificName' in eol_data:
             label = eol_data['scientificName']
     else:
         print('Failed to read data: ' + str(eol_data))
     if label is not False:
         print('Saving data for: ' + str(label) + ' (' + eol_uri + ')')
         le = LinkEntity()
         le.uri = eol_uri
         le.label = label
         le.alt_label = label
         le.ent_type = 'class'
         le.vocab_uri = self.EOL_VOCAB_URI
         le.save()
Exemplo n.º 25
0
 def save_entity_labels(self):
     """ saves labels of entities in a vocabulary """
     output = False
     if (self.graph is not False and self.vocabulary_uri is not False):
         output = []
         if (self.replace_old):
             LinkEntity.objects.filter(
                 vocab_uri=self.vocabulary_uri).delete()
         for s, p, o in self.graph.triples((None, RDFS.label, None)):
             subject_uri = s.__str__(
             )  # get the URI of the subject as a string
             label = o.__str__()  # get the Label of the object as a string
             newr = LinkEntity()
             newr.uri = subject_uri
             newr.label = label
             newr.alt_label = label
             newr.ent_type = 'type'
             newr.vocab_uri = self.vocabulary_uri
             newr.save()
             act_t = {'s': subject_uri, 'o': label}
             output.append(act_t)
     return output
Exemplo n.º 26
0
 def get_save_entity_label(self, eol_uri):
     """ gets the entity label from the EOL API
         and saves it
     """
     label = False
     eol_api = eolAPI()
     eol_data = eol_api.get_basic_json_for_eol_uri(eol_uri)
     if isinstance(eol_data, dict):
         print('Reading data...')
         if 'scientificName' in eol_data:
             label = eol_data['scientificName']
     else:
         print('Failed to read data: ' + str(eol_data))
     if label is not False:
         print('Saving data for: ' + str(label) + ' (' + eol_uri + ')')
         le = LinkEntity()
         le.uri = eol_uri
         le.label = label
         le.alt_label = label
         le.ent_type = 'class'
         le.vocab_uri = self.EOL_VOCAB_URI
         le.save()
Exemplo n.º 27
0
    def check_add_link_entity(self, uri):
        """ Checks to see if an entity exists, if not, it adds
            it if we recognize the URI to be part of a
            known vocabulary
        """
        ent = LinkEntity.objects.filter(uri=uri).first()
        if ent:
            # We found the linked data entity.
            return ent

        label = None
        alt_label = None
        ent_type = 'class'
        vocab_uri = None
        if '.geonames.org' in uri:
            geo_api = GeonamesAPI()
            vocab_uri = GeonamesAPI().VOCAB_URI
            labels = geo_api.get_labels_for_uri(uri)
            if isinstance(labels, dict):
                # got the label!
                label = labels['label']
                alt_label = labels['alt_label']
        elif 'UBERON' in uri:
            uber_api = uberonAPI()
            vocab_uri = uberonAPI().VOCAB_URI
            label = uber_api.get_uri_label_from_graph(uri)
            if label is not False:
                alt_label = label
        elif 'eol.org' in uri:
            eol_api = eolAPI()
            vocab_uri = eolAPI().VOCAB_URI
            labels = eol_api.get_labels_for_uri(uri)
            if isinstance(labels, dict):
                # got the label!
                label = labels['label']
                alt_label = labels['alt_label']
        elif 'wikipedia.org' in uri:
            # page name in the URI of the article
            link_ex = uri.split('/')
            label = urlunquote(link_ex[-1])
            label = label.replace('_', ' ')  # underscores in Wikipedia titles
            alt_label = label
            vocab_uri = 'http://www.wikipedia.org/'
        elif 'vocab.getty.edu/aat' in uri:
            print('Finding: ' + uri)
            getty_api = gettyAPI()
            vocab_uri = gettyAPI().VOCAB_URI
            labels = getty_api.get_labels_for_uri(uri)
            if isinstance(labels, dict):
                # got the label!
                label = labels['label']
                alt_label = labels['alt_label']
        elif 'numismatics.org/ocre/id/' in uri:
            print('Finding: ' + uri)
            ANSochre = ANSochreAPI()
            vocab_uri = ANSochreAPI().VOCAB_URI
            labels = ANSochre.get_labels_for_uri(uri)
            if isinstance(labels, dict):
                # got the label!
                label = labels['label']
                alt_label = labels['alt_label']
        elif 'gbif.org/species/' in uri:
            ent = add_get_gbif_link_entity_and_hierarchy(uri)
            # This adds the linked entity to the database
            # as well as its hierarchy
            return ent
        if not label or not vocab_uri:
            # Something went wrong. Could not
            # add the item
            return None

        # ok to make an entity then!
        ent = LinkEntity()
        ent.uri = uri
        ent.label = label
        ent.alt_label = alt_label
        ent.vocab_uri = vocab_uri
        ent.ent_type = ent_type
        ent.sort = ''
        ent.save()
        return ent
Exemplo n.º 28
0
 def save_csv_from_filecache(self):
     """ updates Open Context to save new sites
         and annotations from the 
         file cache
     """
     if self.filecache is not None:
         # print('Cache update !: ' + self.cache_filekey)
         self.filecache.working_dir = self.working_dir
         json_obj = self.filecache.get_dict_from_file(self.cache_filekey)
         filename = self.cache_filekey + '.csv'
         directory = self.filecache.prep_directory(self.working_dir)
         dir_filename = os.path.join(directory,
                                     filename)
         if isinstance(json_obj, dict):
             if 'trinomial_refs' in json_obj:
                 field_name_row = [
                     'County Code',
                     'County Name',
                     'Trinomial',
                     'Citation',
                     'URI',
                     'Title',
                     'Note'
                 ]
                 f = codecs.open(dir_filename, 'w', encoding='utf-8')
                 writer = csv.writer(f, dialect=csv.excel, quoting=csv.QUOTE_ALL)
                 writer.writerow(field_name_row)
                 for tri_ref in json_obj['trinomial_refs']:
                     citation = tri_ref['citation_html']
                     uri = tri_ref['rec_uri']
                     title = tri_ref['title']
                     if len(title) > 194:
                         title = title[0:190] + '... '
                     l_exists = LinkEntity.objects.filter(uri=uri)[:0]
                     if len(l_exists) < 1:
                         l_ent = LinkEntity()
                         l_ent.uri = uri
                         l_ent.label = title
                         l_ent.alt_label = title
                         l_ent.vocab_uri = tri_ref['source_uri']
                         l_ent.ent_type = 'class'
                         l_ent.save()
                     if 'note' in tri_ref:
                         note = tri_ref['note']
                     else:
                         note = ''
                     for trinomial in tri_ref['trinomials']:
                         county_code = trinomial[0:4]
                         act_county_name = None
                         for key, county_name in self.COUNTY_PREFIXES.items():
                             if county_code == key:
                                 act_county_name = county_name
                                 break
                         print('County code: ' + county_code + ' is ' + str(act_county_name))
                         row = [
                             county_code,
                             act_county_name,
                             trinomial,
                             citation,
                             uri,
                             title,
                             note
                         ]
                         writer.writerow(row)
                 print('Done!')
                 f.closed
Exemplo n.º 29
0
 def add_update(self, post_data):
     """ Creates or updates a linked data entity """
     ok = True
     uri = False
     label = False
     vocab_uri = False
     alt_label = False
     ent_type = 'class'
     note = ''
     action = 'attempted creation or update'
     sent_uri = uri
     sent_label = label
     if 'uri' in post_data:
         uri = post_data['uri']
         sent_uri = uri
         if not self.validate_web_uri(uri):
             # must be a full web uri to use
             note += '"' + uri + '" needs to be valid Web URI. '
             uri = False
     if 'label' in post_data:
         label = post_data['label']
         sent_label = label
         alt_label = label  # default for alt-label is label
         if len(label) < 1:
             note += 'The entity label cannot be blank. '
             label = False
     if 'alt_label' in post_data:
         if len(post_data['alt_label']) > 0:
             alt_label = post_data['alt_label']
     if 'ent_type' in post_data:
         ent_type = post_data['ent_type']
     if 'vocab_uri' in post_data:
         vocab_uri = post_data['vocab_uri']
         if not self.validate_web_uri(vocab_uri)\
            and ent_type != 'vocabulary':
             # vocab_uri is not a full uri, so suggest one
             # based on the URI for the request
             vocab_uri = self.suggest_vocabulary(uri)
         elif not self.validate_web_uri(vocab_uri)\
            and ent_type == 'vocabulary':
             vocab_uri = uri
         else:
             pass
     if uri is not False \
        and label is not False \
        and vocab_uri is not False:
         le_gen = LinkEntityGeneration()
         uri = le_gen.make_clean_uri(uri)
         if uri != vocab_uri:
             # get the varient of the vocab_uri that's actually in use
             # returns false if a varient can't be found
             vocab_uri = self.check_vocab_uri(vocab_uri)
             if vocab_uri is False:
                 # cannot find a varient for this vocabulary uri
                 vocab_ok = False
             else:
                 vocab_ok = True
         elif ent_type == 'vocabulary':
             vocab_ok = True
         else:
             vocab_ok = False
         if vocab_ok:
             ok = True
             try:
                 action = 'edit-update'
                 le = LinkEntity.objects.get(uri=uri)
             except LinkEntity.DoesNotExist:
                 action = 'add-create'
                 le = LinkEntity()
                 le.uri = uri
             # now add information to save
             le.label = label
             le.alt_label = alt_label
             le.ent_type = ent_type
             le.vocab_uri = vocab_uri
             le.save()
             uri = le.uri  # in case the URI changed because of validation changes
         else:
             ok = False
             note += 'Must first create a record for the vocabulary. '
     else:
         ok = False
         note += 'Missing data required for this action. '
     self.response = {
         'action': action,
         'uri': sent_uri,
         'label': sent_label,
         'ok': ok,
         'change': {
             'note': note
         }
     }
     return self.response
Exemplo n.º 30
0
 def find_related_geonames(self, username='******'):
     """ Adds geonames spatial data for items with geonames annotations """
     man_objs = Manifest.objects\
                        .filter(project_uuid='0',
                                class_uri='oc-gen:cat-region',
                                item_type='subjects')
     for man_obj in man_objs:
         print('Checking slug: ' + man_obj.slug)
         subj_obj = Subject.objects.get(uuid=man_obj.uuid)
         context = subj_obj.context
         if '/' in context:
             cont_ex = context.split('/')
             admin_level = len(cont_ex) - 1
             if admin_level < 0:
                 admin_level = 0
         else:
             admin_level = 0
         q_str = context.replace('/', ' ')
         geo_api = GeonamesAPI()
         json_r = geo_api.search_admin_entity(q_str,
                                              admin_level,
                                              username)
         if isinstance(json_r, dict):
             # we found a result from GeoNames!
             print('Geonames result found.')
             if 'geonames' in json_r:
                 if len(json_r['geonames']) > 0:
                     # we've got a result
                     geo_id = json_r['geonames'][0]['geonameId']
                     label = json_r['geonames'][0]['name']
                     alt_label = json_r['geonames'][0]['toponymName']
                     geonames_uri = 'http://www.geonames.org/' + str(geo_id)
                     l_ents = LinkEntity.objects\
                                        .filter(uri=geonames_uri)[:1]
                     if len(l_ents) < 1:
                         # we need to create this entity
                         ent = LinkEntity()
                         ent.uri = geonames_uri
                         ent.label = label
                         ent.alt_label = alt_label
                         ent.vocab_uri = GeonamesAPI().VOCAB_URI
                         ent.ent_type = 'class'
                         ent.save()
                     print(geonames_uri)
                     annos = LinkAnnotation.objects\
                                           .filter(subject=man_obj.uuid,
                                                   object_uri=geonames_uri)[:1]
                     if len(annos) < 1:
                         # we need to add the annotation linking this item
                         print('Adding new annotation!')
                         new_la = LinkAnnotation()
                         new_la.subject = man_obj.uuid
                         new_la.subject_type = man_obj.item_type
                         new_la.project_uuid = man_obj.project_uuid
                         new_la.source_id = man_obj.source_id
                         new_la.predicate_uri = 'skos:closeMatch'
                         new_la.object_uri = geonames_uri
                         new_la.creator_uuid = ''
                         new_la.save()
                     else:
                         print('Relation already known.')
Exemplo n.º 31
0
 def prepare(self):
     """ checks to make sure the referrer actually exists
         in the database
     """
     if self.uri is not False:
         ent = Entity()
         found = ent.dereference(self.uri)
         if found:
             self.label = ent.label
             self.alt_label = ent.alt_label
             self.vocab_label = ent.vocabulary
             self.vocab_uri = ent.vocab_uri
             self.valid = True
         else:
             # the referring source is not known in the database
             if self.vocab_uri is not False\
                and self.vocab_label is not False\
                and self.label is not False:
                 # we have enough data to save a referrer in the database
                 referrer_ent_type = 'vocabulary'
                 if self.vocab_uri != self.uri:
                     referrer_ent_type = 'class'
                     ent_v = Entity()
                     found_v = ent_v.dereference(self.vocab_uri)
                     if found_v is False:
                         # the referring vocabulary is not known in the database
                         # so we need to create it
                         lev = LinkEntity()
                         lev.uri = self.vocab_uri
                         lev.label = self.vocab_label
                         lev.alt_label = self.vocab_alt_label
                         lev.vocab_uri = self.vocab_uri
                         lev.ent_type = 'vocabulary'
                         lev.save()
                 # now are ready to make a linked entity for the referrer
                 le = LinkEntity()
                 le.uri = self.uri
                 le.label = self.label
                 le.alt_label = self.alt_label
                 le.vocab_uri = self.vocab_uri
                 le.ent_type = referrer_ent_type
                 le.save()
                 self.valid = True
Exemplo n.º 32
0
 def match_california_site(self, site_uuid):
     """ Attempts to match California site name with a tDAR
         site key word
     """
     found_matches = 0
     oc_item = OCitem()
     exists = oc_item.check_exists(site_uuid)
     if exists:
         la_check = LinkAnnotation.objects\
                                  .filter(subject=site_uuid,
                                          predicate_uri='dc-terms:subject',
                                          object_uri__contains=self.TDAR_VOCAB)[:1]
     if exists and len(la_check) < 1:
         # we don't already have a tDAR id for this item, continue with matches
         # first, generate the item's JSON-LD
         oc_item.generate_json_ld()
         request_keywords = []
         if 'oc-gen:has-obs' in oc_item.json_ld:
             if isinstance(oc_item.json_ld['oc-gen:has-obs'], list):
                 for obs in oc_item.json_ld['oc-gen:has-obs']:
                     if 'oc-pred:52-alternate-site-or-place-name' in obs:
                         if isinstance(
                                 obs['oc-pred:52-alternate-site-or-place-name'],
                                 list):
                             for name_obj in obs[
                                     'oc-pred:52-alternate-site-or-place-name']:
                                 if 'xsd:string' in name_obj:
                                     if isinstance(name_obj['xsd:string'],
                                                   str):
                                         name_str = name_obj['xsd:string']
                                         request_keywords.append(name_str)
         print('Checking names in tDAR: ' + '; '.join(request_keywords))
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     lw_result = result['label'].lower()
                     lw_keyword = keyword.lower()
                     if lw_result == lw_keyword:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     if match_real:
                         print('FOUND ' + result['label'])
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(
                                 uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = oc_item.manifest.uuid
                         la.subject_type = oc_item.manifest.item_type
                         la.project_uuid = oc_item.manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] +
                               ' is not exactly: ' + keyword)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) +
                           ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Exemplo n.º 33
0
 def match_trinomial_obj(self, tri):
     """ Attempts to match a trinomial object 'tri'
         against tDAR, if it hasn't yet been matched
     """
     found_matches = 0
     manifest = False
     try:
         manifest = Manifest.objects.get(uuid=tri.uuid)
     except Manifest.DoesNotExist:
         manifest = False
     la_check = LinkAnnotation.objects\
                              .filter(subject=tri.uuid,
                                      predicate_uri='dc-terms:subject',
                                      object_uri__contains=self.TDAR_VOCAB)[:1]
     if len(la_check) < 1 and manifest is not False:
         # we don't already have a tDAR id for this item, continue with matches
         tri_man = TrinomialManage()
         request_keywords = [tri.trinomial]
         if self.lead_zero_check:
             # check multiple leading zeros
             tri_parts = tri_man.parse_trinomial(tri.trinomial)
             site = tri_parts['site']
             site_part_len = len(site)
             while len(site) < 4:
                 site = '0' + site
                 new_trinomial = tri_parts['state'] + tri_parts['county'] + site
                 request_keywords.append(new_trinomial)
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     if result['label'] == tri.trinomial:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     else:
                         # check if the only difference is in leading zeros
                         tri_parts = tri_man.parse_trinomial(tri.trinomial)
                         site = tri_parts['site']
                         site_part_len = len(site)
                         while len(site) < 5:
                             site = '0' + site
                             new_trinomial = tri_parts['state'] + tri_parts['county'] + site
                             if new_trinomial == result['label']:
                                 # A good match, the tDAR result and the trinomial
                                 # match (but with different leading zeros)
                                 match_real = True
                     if match_real:
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = tri.uuid
                         la.subject_type = manifest.item_type
                         la.project_uuid = manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] + ' is not exactly: ' + tri.trinomial)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) + ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Exemplo n.º 34
0
 def prepare(self):
     """ checks to make sure the referrer actually exists
         in the database
     """
     if self.uri is not False:
         ent = Entity()
         found = ent.dereference(self.uri)
         if found:
             self.label = ent.label
             self.alt_label = ent.alt_label
             self.vocab_label = ent.vocabulary
             self.vocab_uri = ent.vocab_uri
             self.valid = True
         else:
             # the referring source is not known in the database
             if self.vocab_uri is not False\
                and self.vocab_label is not False\
                and self.label is not False:
                 # we have enough data to save a referrer in the database
                 referrer_ent_type = 'vocabulary'
                 if self.vocab_uri != self.uri:
                     referrer_ent_type = 'class'
                     ent_v = Entity()
                     found_v = ent_v.dereference(self.vocab_uri)
                     if found_v is False:
                         # the referring vocabulary is not known in the database
                         # so we need to create it
                         lev = LinkEntity()
                         lev.uri = self.vocab_uri
                         lev.label = self.vocab_label
                         lev.alt_label = self.vocab_alt_label
                         lev.vocab_uri = self.vocab_uri
                         lev.ent_type = 'vocabulary'
                         lev.save()
                 # now are ready to make a linked entity for the referrer
                 le = LinkEntity()
                 le.uri = self.uri
                 le.label = self.label
                 le.alt_label = self.alt_label
                 le.vocab_uri = self.vocab_uri
                 le.ent_type = referrer_ent_type
                 le.save()
                 self.valid = True
Exemplo n.º 35
0
 def check_add_link_entity(self, uri):
     """ checkes to see if an entity exists, if not, it adds
         it if we recognize the URI to be part of a
         known vocabulary
     """
     try:
         act_ent = LinkEntity.objects.get(uri=uri)
     except LinkEntity.DoesNotExist:
         act_ent = False
     if act_ent is False:
         label = False
         alt_label = False
         ent_type = 'class'
         vocab_uri = False
         if '.geonames.org' in uri:
             geo_api = GeonamesAPI()
             vocab_uri = GeonamesAPI().VOCAB_URI
             labels = geo_api.get_labels_for_uri(uri)
             if isinstance(labels, dict):
                 # got the label!
                 label = labels['label']
                 alt_label = labels['alt_label']
         elif 'UBERON' in uri:
             uber_api = uberonAPI()
             vocab_uri = uberonAPI().VOCAB_URI
             label = uber_api.get_uri_label_from_graph(uri)
             if label is not False:
                 alt_label = label
         elif 'eol.org' in uri:
             eol_api = eolAPI()
             vocab_uri = eolAPI().VOCAB_URI
             labels = eol_api.get_labels_for_uri(uri)
             if isinstance(labels, dict):
                 # got the label!
                 label = labels['label']
                 alt_label = labels['alt_label']
         elif 'wikipedia.org' in uri:
             # page name in the URI of the article
             link_ex = uri.split('/')
             label = urlunquote(link_ex[-1])
             label = label.replace('_', ' ')  # underscores in Wikipedia titles
             alt_label = label
             vocab_uri = 'http://www.wikipedia.org/'
         elif 'vocab.getty.edu/aat' in uri:
             print('Finding: ' + uri)
             getty_api = gettyAPI()
             vocab_uri = gettyAPI().VOCAB_URI
             labels = getty_api.get_labels_for_uri(uri)
             if isinstance(labels, dict):
                 # got the label!
                 label = labels['label']
                 alt_label = labels['alt_label']
         elif 'numismatics.org/ocre/id/' in uri:
             print('Finding: ' + uri)
             ANSochre = ANSochreAPI()
             vocab_uri = ANSochreAPI().VOCAB_URI
             labels = ANSochre.get_labels_for_uri(uri)
             if isinstance(labels, dict):
                 # got the label!
                 label = labels['label']
                 alt_label = labels['alt_label']
         if label is not False and vocab_uri is not False:
             # ok to make an entity then!
             ent = LinkEntity()
             ent.uri = uri
             ent.label = label
             ent.alt_label = alt_label
             ent.vocab_uri = vocab_uri
             ent.ent_type = ent_type
             ent.save()
Exemplo n.º 36
0
 def match_california_site(self, site_uuid):
     """ Attempts to match California site name with a tDAR
         site key word
     """
     found_matches = 0
     oc_item = OCitem()
     exists = oc_item.check_exists(site_uuid)
     if exists:
         la_check = LinkAnnotation.objects\
                                  .filter(subject=site_uuid,
                                          predicate_uri='dc-terms:subject',
                                          object_uri__contains=self.TDAR_VOCAB)[:1]
     if exists and len(la_check) < 1:
         # we don't already have a tDAR id for this item, continue with matches
         # first, generate the item's JSON-LD
         oc_item.generate_json_ld()
         request_keywords = []
         if 'oc-gen:has-obs' in oc_item.json_ld:
             if isinstance(oc_item.json_ld['oc-gen:has-obs'], list):
                 for obs in oc_item.json_ld['oc-gen:has-obs']:
                     if 'oc-pred:52-alternate-site-or-place-name' in obs:
                         if isinstance(obs['oc-pred:52-alternate-site-or-place-name'], list): 
                             for name_obj in obs['oc-pred:52-alternate-site-or-place-name']:
                                 if 'xsd:string' in name_obj:
                                     if isinstance(name_obj['xsd:string'], str):
                                         name_str = name_obj['xsd:string']
                                         request_keywords.append(name_str)
         print('Checking names in tDAR: ' + '; '.join(request_keywords))
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     lw_result = result['label'].lower()
                     lw_keyword = keyword.lower()
                     if lw_result == lw_keyword:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     if match_real:
                         print('FOUND ' + result['label'])
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = oc_item.manifest.uuid
                         la.subject_type = oc_item.manifest.item_type
                         la.project_uuid = oc_item.manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] + ' is not exactly: ' + keyword)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) + ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches
Exemplo n.º 37
0
 def match_trinomial_obj(self, tri):
     """ Attempts to match a trinomial object 'tri'
         against tDAR, if it hasn't yet been matched
     """
     found_matches = 0
     manifest = False
     try:
         manifest = Manifest.objects.get(uuid=tri.uuid)
     except Manifest.DoesNotExist:
         manifest = False
     la_check = LinkAnnotation.objects\
                              .filter(subject=tri.uuid,
                                      predicate_uri='dc-terms:subject',
                                      object_uri__contains=self.TDAR_VOCAB)[:1]
     if len(la_check) < 1 and manifest is not False:
         # we don't already have a tDAR id for this item, continue with matches
         tri_man = TrinomialManage()
         request_keywords = [tri.trinomial]
         if self.lead_zero_check:
             # check multiple leading zeros
             tri_parts = tri_man.parse_trinomial(tri.trinomial)
             site = tri_parts['site']
             site_part_len = len(site)
             while len(site) < 4:
                 site = '0' + site
                 new_trinomial = tri_parts['state'] + tri_parts[
                     'county'] + site
                 request_keywords.append(new_trinomial)
         for keyword in request_keywords:
             tdar_api = tdarAPI()
             results = tdar_api.get_site_keyword(keyword)
             if isinstance(results, list):
                 for result in results[:self.max_results]:
                     # assume it is a spurious match
                     match_real = False
                     if result['label'] == tri.trinomial:
                         # the trinomial and the tDAR result exactly match
                         match_real = True
                     else:
                         # check if the only difference is in leading zeros
                         tri_parts = tri_man.parse_trinomial(tri.trinomial)
                         site = tri_parts['site']
                         site_part_len = len(site)
                         while len(site) < 5:
                             site = '0' + site
                             new_trinomial = tri_parts['state'] + tri_parts[
                                 'county'] + site
                             if new_trinomial == result['label']:
                                 # A good match, the tDAR result and the trinomial
                                 # match (but with different leading zeros)
                                 match_real = True
                     if match_real:
                         found_matches += 1
                         # OK! Found a match, first save the linked entity in the link entity table
                         le_check = False
                         try:
                             le_check = LinkEntity.objects.get(
                                 uri=result['id'])
                         except LinkEntity.DoesNotExist:
                             le_check = False
                         if le_check is False:
                             le = LinkEntity()
                             le.uri = result['id']
                             le.label = result['label']
                             le.alt_label = result['label']
                             le.vocab_uri = self.TDAR_VOCAB
                             le.ent_type = 'type'
                             le.save()
                         # Now save the link annotation
                         la = LinkAnnotation()
                         la.subject = tri.uuid
                         la.subject_type = manifest.item_type
                         la.project_uuid = manifest.project_uuid
                         la.source_id = 'tdar-api-lookup'
                         la.predicate_uri = self.DC_TERMS_SUBJECT
                         la.object_uri = result['id']
                         la.save()
                     else:
                         print('Almost! ' + result['label'] +
                               ' is not exactly: ' + tri.trinomial)
             if tdar_api.request_error:
                 self.request_error = True
                 print('HTTP request to tDAR failed!')
                 self.error_wait += self.base_wait
                 if self.error_wait > self.max_wait:
                     print('Too many failures, quiting...')
                     sys.exit('Quitting process')
                 else:
                     # sleep some minutes before trying again
                     print('Will try again in ' + str(self.error_wait) +
                           ' seconds...')
                     sleep(self.error_wait)
             else:
                 self.request_error = False
                 if self.error_wait >= self.base_wait:
                     print('HTTP requests resumed OK, will continue.')
                     self.error_wait = 0
     return found_matches