Пример #1
0
def populateValue(g, datasetId, ds, data, p, o, iriCache):

    ## Skipping following IRI's as they are handled separately (getResearcher, getProtocols, etc.)
    skipIri = [
        term.URIRef('http://uri.interlex.org/temp/uris/contributorTo'),
        term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
        term.URIRef('http://uri.interlex.org/temp/uris/hasUriApi'),
        term.URIRef('http://uri.interlex.org/temp/uris/hasUriHuman'),
        term.URIRef('http://uri.interlex.org/temp/uris/hasProtocol'),
        term.URIRef('http://uri.interlex.org/temp/uris/wasUpdatedAtTime')]
    key = strip_iri(p.strip())

    if p in skipIri:
        return

    if isinstance(o, term.URIRef):
        value = iri_lookup(g, o.strip(), iriCache)
        if value:
            if isinstance(value, dict) and 'curie' in value:
                ds['term'][value['curie']] = value
                value = value['curie']
            # if isinstance(value, dict) and 'iri' in value:
            #     key = strip_iri(value['iri'])
            #     ds['term'][key] = value
            #     value = key

            if key in arrayProps:
                array = data.setdefault(key, [])
                array.append(value)
            
            else:
                if key in data:
                    log.warning('Unexpected creation of array for:  %s - %s - %s', datasetId, key, value)
                    log.warning('Existing value for this key     :  %s - %s - %s', datasetId, key, data[key])
                    log.warning('----- Will use the shortest value -----')
                    if len(value) < len(data[key]):
                        data[key] = value
                else:
                    data[key] = value

    elif isinstance(o, term.Literal):
        value = strip_iri(o.strip())
        if key in arrayProps:
            array = data.setdefault(key, [])
            array.append(value)
        else:
            if key in data:
                log.warning('Unexpected creation of array for:  %s - %s - %s', datasetId, key, value)
                log.warning('Existing value for this key     :  %s - %s - %s', datasetId, key, data[key])
                log.warning('----- Will use the shortest value -----')
                if len(value) < len(data[key]):
                    data[key] = value
            else:
                data[key] = value

    elif isinstance(o, term.BNode):
        data[key] = parseMeasure(datasetId, g, o, {'value': '', 'unit': ''})

    else:
        raise Exception('Unknown RDF term: %s' % type(o))
Пример #2
0
def getResearchers(gNew, gDelta, output, iriCache):
    # Iterate over Researchers
    for s, o in gNew.subject_objects(URIRef('http://uri.interlex.org/temp/uris/contributorTo')):
        m = re.search(r".*(?P<ds>N:dataset:[:\w-]+)", o)
        datasetId = strip_iri(m.group(0).strip())
        user = strip_iri(s)
        # user = s #s.split('/')[-1] # either a blackfynn user id or "Firstname-Lastname"
        newEntry = {}
        for p2, o2 in gDelta.predicate_objects(s):
            populateValue(gDelta, datasetId, output[datasetId], newEntry, p2, o2, iriCache)
        if newEntry:
            output[datasetId]['researcher'][user] = newEntry
Пример #3
0
def parseMeasure(dsId, g, node, values):

    if (node, None, URIRef('http://uri.interlex.org/tgbugs/uris/readable/sparc/Measurement')) in g:
        # Current BNode is a measurement
        # preds = g.predicates(subject=node)
        # for v in preds:
        #     print('pred: {}'.format(v))
        #     values['unit'] = strip_iri(v)
        
        unit = strip_iri(g.value(subject=node, predicate=URIRef('http://uri.interlex.org/temp/uris/hasUnit')))
        values['unit'] = unit
            
        value = g.value(subject=node, predicate=URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#value'))
        values['value'] = str(value)

        if values['unit'] == 'dimensionless':
            log.warning("Measurement with no unit (value: {}) in {}".format(values['value'], dsId))

    elif (node, None, URIRef('http://www.w3.org/2000/01/rdf-schema#Datatype')) in g:
            # Current BNode is a rdfs:Datatype

            unit = strip_iri(g.value(subject=node, predicate=URIRef('http://www.w3.org/2002/07/owl#onDatatype')))
            values['unit'] = strip_iri(unit)

            value = g.value(subject=node, predicate=URIRef('http://www.w3.org/2002/07/owl#withRestrictions'))
            
            # Get Lower Bound Range
            first = g.value(subject=value, predicate=URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'))
            min_incl = g.value(subject=first, predicate=URIRef('http://www.w3.org/2001/XMLSchema#minInclusive'))
            
            #Get Higher Bound Range
            rest = g.value(subject=value, predicate=URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'))
            rest_first = g.value(subject=rest, predicate=URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'))
            max_incl = g.value(subject=rest_first, predicate=URIRef('http://www.w3.org/2001/XMLSchema#maxInclusive'))

            values['value'] = "{}-{}".format(str(min_incl), str(max_incl))

            if values['unit'] == 'dimensionless':
                log.warning("Measurement with no unit (value: {}) in {}".format(values['value'], dsId))

    else:
        log.warning("Encountered a B-Node that is not a measurement in {}".format(dsId))

    return values
Пример #4
0
def getProtocols(gNew, gDelta, output, iriCache):
    # Iterate over Protocols
    for s, o in gNew.subject_objects(URIRef('http://uri.interlex.org/temp/uris/hasProtocol')):
        m = re.search(r".*(?P<ds>N:dataset:[:\w-]+)", s)
        datasetId = strip_iri(m.group(0).strip())
        url = str(o)
        newEntry = {}
        for p2, o2 in gDelta.predicate_objects(o):
            populateValue(gDelta, datasetId, output[datasetId], newEntry, p2, o2, iriCache)
        if newEntry:
            output[datasetId]['protocol'][url] = newEntry
Пример #5
0
def getDatasets(gNew, gDelta, output, iriCache):
    # Iterate over Datasets
    for ds in gNew.subjects(RDF.type, URIRef('http://uri.interlex.org/tgbugs/uris/readable/sparc/Dataset')):
        log.info(ds)
        m = re.search(r".*(?P<ds>N:dataset:[:\w-]+)", ds)
        datasetId = strip_iri(m.group(0).strip())
        addEntry(output, datasetId)
        for p, o in gDelta.predicate_objects(ds):
            if p == URIRef("http://uri.interlex.org/temp/uris/hasAwardNumber"):
                getAwards(o, datasetId, output)
            populateValue(gDelta, datasetId, output[datasetId], output[datasetId]['summary'], p, o, iriCache)
Пример #6
0
def getTags(gNew, gDelta, output, iriCache):
    # Iterate over Protocols
    for s, o in gNew.subject_objects(URIRef('http://purl.obolibrary.org/obo/IAO_0000136')):
        m = re.search(r".*(?P<ds>N:dataset:[:\w-]+)", s)
        if m:
            if isinstance(o, term.URIRef):
                t = iri_lookup(gNew, o, iriCache)
                if t:
                    tag = t['labels'][0]
                else:
                    continue
            else:
                tag = str(o)

            datasetId = strip_iri(m.group(0).strip())
            if tag not in output[datasetId]['tag']:
                output[datasetId]['tag'].append(tag)
Пример #7
0
def getAwards(awardIdURI, dsId, output):
    # Iterate over awards
    awardId = strip_iri(awardIdURI)
    output[dsId]['award'][awardId] = {
        'awardId': awardId
    }