示例#1
0
    def get_vocabulary(self):
        from model.vocabulary import Vocabulary

        q = '''PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            PREFIX dct: <http://purl.org/dc/terms/>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            SELECT *
            WHERE {
              ?s a skos:ConceptScheme ;
              dct:title ?t ;
              dct:description ?d .
              OPTIONAL {?s dct:creator ?c }
              OPTIONAL {?s dct:created ?cr }
              OPTIONAL {?s dct:modified ?m }
              OPTIONAL {?s owl:versionInfo ?v }
            }'''
        for r in self.g.query(q):
            v = Vocabulary(self.vocab_id, r['s'], r['t'], r['d'], r['c'],
                           r['cr'], r['m'], r['v'], [], None, None)

        q2 = '''PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            SELECT *
            WHERE {
              ?s skos:hasTopConcept ?tc .
              ?tc skos:prefLabel ?pl .
            }'''
        # add the top concepts to the Vocabulary class instance
        v.hasTopConcepts = [(x['tc'], x['pl']) for x in self.g.query(q2)]
        # sort the top concepts by prefLabel
        v.hasTopConcepts.sort(key=lambda tup: tup[1])
        return v
示例#2
0
文件: search.py 项目: chihchun/bebebe
    def index(self):
        word = self.params.get('word')
        if not word:
            raise "no word!"

        user = get_user_or_letlogin(self,'/search/?word=%s' % word)
        log = SearchLog(word=word, user=user)
        log.put()

        voca = Vocabulary(key_name=user.email+'_'+word, word=word, user=user)
        voca.put()

        query_url = YahooDictionary.get_search_url(word)
        self.redirect(query_url)
示例#3
0
    def get_vocabulary(self):
        s = VocBench("x", self.request)._authed_request_object()
        r = s.post(
            config.VB_ENDPOINT + "/SPARQL/evaluateQuery",
            data={
                "query": """PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
                    PREFIX dct: <http://purl.org/dc/terms/>
                    PREFIX owl: <http://www.w3.org/2002/07/owl#>
                    SELECT *
                    WHERE {
                      ?s a skos:ConceptScheme ;
                      skos:prefLabel ?t .
                      OPTIONAL {?s dct:description ?d }
                      OPTIONAL {?s dct:creator ?c }
                      OPTIONAL {?s dct:created ?cr }
                      OPTIONAL {?s dct:modified ?m }
                      OPTIONAL {?s owl:versionInfo ?v }
                      OPTIONAL {?tc skos:topConceptOf ?s }
                      OPTIONAL {?tc skos:prefLabel ?tcpl }
                    }""",
                "ctx_project": self.vocab_uri,
            },
        )

        if r.status_code == 200:
            try:
                metadata = json.loads(r.content.decode(
                    "utf-8"))["result"]["sparql"]["results"]["bindings"]
            except:
                raise VbException(r.content.decode("utf-8"))

            concept_hierarchy = self.get_concept_hierarchy(
                str(metadata[0]["s"]["value"]))
            if len(concept_hierarchy.strip()) == 0:
                concept_hierarchy = None

            from model.vocabulary import Vocabulary

            return Vocabulary(
                self.vocab_uri,
                metadata[0]["s"]["value"],
                metadata[0]["t"]["value"],
                metadata[0]["d"]["value"]
                if metadata[0].get("d") is not None else None,
                metadata[0].get("c").get("value")
                if metadata[0].get("c") is not None else None,
                metadata[0].get("cr").get("value")
                if metadata[0].get("cr") is not None else None,
                metadata[0].get("m").get("value")
                if metadata[0].get("m") is not None else None,
                metadata[0].get("v").get("value")
                if metadata[0].get("v") is not None else None,
                [(x.get("tc").get("value"), x.get("tcpl").get("value"))
                 for x in metadata],
                concept_hierarchy=concept_hierarchy,
            )
        else:
            raise VbException("There was an error: " +
                              r.content.decode("utf-8"))
    def get_vocabulary(self):
        s = VOCBENCH('x', self.request)._authed_request_object()
        r = s.post(config.VB_ENDPOINT + '/SPARQL/evaluateQuery',
                   data={
                       'query':
                       '''PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
                    PREFIX dct: <http://purl.org/dc/terms/>
                    PREFIX owl: <http://www.w3.org/2002/07/owl#>
                    SELECT *
                    WHERE {
                      ?s a skos:ConceptScheme ;
                      skos:prefLabel ?t .
                      OPTIONAL {?s dct:description ?d }
                      OPTIONAL {?s dct:creator ?c }
                      OPTIONAL {?s dct:created ?cr }
                      OPTIONAL {?s dct:modified ?m }
                      OPTIONAL {?s owl:versionInfo ?v }
                      OPTIONAL {?tc skos:topConceptOf ?s }
                      OPTIONAL {?tc skos:prefLabel ?tcpl }
                    }''',
                       'ctx_project': self.vocab_id
                   })

        if r.status_code == 200:
            try:
                metadata = json.loads(r.content.decode(
                    'utf-8'))['result']['sparql']['results']['bindings']
            except:
                raise VbException(r.content.decode('utf-8'))

            concept_hierarchy = self.get_concept_hierarchy(
                str(metadata[0]['s']['value']))
            if len(concept_hierarchy.strip()) == 0:
                concept_hierarchy = None

            from model.vocabulary import Vocabulary
            return Vocabulary(
                self.vocab_id,
                metadata[0]['s']['value'],
                metadata[0]['t']['value'],
                metadata[0]['d']['value']
                if metadata[0].get('d') is not None else None,
                metadata[0].get('c').get('value')
                if metadata[0].get('c') is not None else None,
                metadata[0].get('cr').get('value')
                if metadata[0].get('cr') is not None else None,
                metadata[0].get('m').get('value')
                if metadata[0].get('m') is not None else None,
                metadata[0].get('v').get('value')
                if metadata[0].get('v') is not None else None,
                [(x.get('tc').get('value'), x.get('tcpl').get('value'))
                 for x in metadata],
                concept_hierarchy=concept_hierarchy)
        else:
            raise VbException('There was an error: ' +
                              r.content.decode('utf-8'))
示例#5
0
    def collect(details):
        """
        For this source, vocabs must be nominated via their ID (a number) in details['vocab_ids']

        'rva': {
            'source': VocabSource.RVA,
            'api_endpoint': 'https://vocabs.ands.org.au/registry/api/resource/vocabularies/{}?includeAccessPoints=true',
            'vocabs': [
                {
                    'ardc_id': 50,
                    'uri': 'http://resource.geosciml.org/classifierscheme/cgi/2016.01/geologicunittype',
                },
                {
                    'ardc_id': 52,
                    'uri': 'http://resource.geosciml.org/classifierscheme/cgi/2016.01/contacttype',
                },
                {
                    'ardc_id': 57,
                    'uri': 'http://resource.geosciml.org/classifierscheme/cgi/2016.01/stratigraphicrank',
                }
            ]
        }
        """

        # Get the details for each vocab from the RVA catalogue API
        logging.debug("RVA collect()...")
        rva_vocabs = {}
        for vocab in details["vocabs"]:
            r = requests.get(
                details["api_endpoint"].format(vocab["ardc_id"]),
                headers={"Accept": "application/json"},
            )
            if r.status_code == 200:
                j = json.loads(r.text)
                vocab_id = "rva-" + str(vocab["ardc_id"])
                rva_vocabs[vocab_id] = Vocabulary(
                    vocab_id,
                    vocab["uri"],
                    j["title"],
                    j.get("description"),
                    j.get("creator"),
                    dateutil.parser.parse(j.get("creation-date")),
                    None,
                    j["version"][0]["title"],
                    config.VocabSource.RVA,
                    vocab["uri"],
                    sparql_endpoint=j["version"][0]["access-point"][0]
                    ["ap-api-sparql"]["url"],
                )
            else:
                logging.error("Could not get vocab {} from RVA".format(
                    vocab["ardc_id"]))
        g.VOCABS = {**g.VOCABS, **rva_vocabs}
        logging.debug("RVA collect() complete")
示例#6
0
文件: user.py 项目: chihchun/bebebe
 def home(self):
     self.user = get_user_or_letlogin(self, '/user/home')
     self.vocabularies = Vocabulary.gql("WHERE user=:1", self.user).fetch(200)
示例#7
0
    def get_vocabulary(self):
        from model.vocabulary import Vocabulary

        result = self.g.query('''PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            PREFIX dct: <http://purl.org/dc/terms/>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            SELECT DISTINCT ?s ?title ?description ?creator ?created ?modified ?version ?hasTopConcept ?topConceptLabel
            WHERE {{
                {{
                    ?s a skos:ConceptScheme .
                    ?s skos:prefLabel ?title .                    
                }}
                UNION
                {{
                    ?s a skos:ConceptScheme .
                    ?s dct:title ?title . 
                    MINUS {{ ?s skos:prefLabel ?prefLabel }}
                }}
                UNION
                {{
                    ?s a skos:ConceptScheme .
                    ?s rdfs:label ?title . 
                    MINUS {{ ?s skos:prefLabel ?prefLabel }}
                    MINUS {{ ?s dct:title ?prefLabel }}
                }}
                OPTIONAL {{ ?s dct:description ?description }}
                OPTIONAL {{ ?s dct:creator ?creator }}
                OPTIONAL {{ ?s dct:created ?created }}
                OPTIONAL {{ ?s dct:modified ?modified }}
                OPTIONAL {{ ?s owl:versionInfo ?version }}
                OPTIONAL {{ 
                    ?s skos:hasTopConcept ?hasTopConcept .
                    ?hasTopConcept skos:prefLabel ?topConceptLabel .
              }}
            }}''')

        title = None
        description = None
        creator = None
        created = None
        modified = None
        version = None

        topConcepts = []

        for r in result:
            self.uri = str(r['s'])
            if title is None:
                title = r['title']
            if description is None:
                description = r['description']
            if creator is None:
                creator = r['creator']
            if created is None:
                created = r['created']
            if modified is None:
                modified = r['modified']
            if version is None:
                version = r['version']
            if r['hasTopConcept'] and r['topConceptLabel'] is not None:
                topConcepts.append((r['hasTopConcept'], r['topConceptLabel']))

        v = Vocabulary(
            self.vocab_id,
            self.uri,
            title,
            description,
            creator,
            created,
            modified,
            version,
            topConcepts
        )

        # sort the top concepts by prefLabel
        v.hasTopConcepts = topConcepts
        if v.hasTopConcepts:
            v.hasTopConcepts.sort(key=lambda tup: tup[1])
        v.conceptHierarchy = self.get_concept_hierarchy()
        return v
示例#8
0
    def collect(details):
        file_vocabs = {}
        # find all files in project_directory/data/source/vocab_files
        for path, subdirs, files in os.walk(
                join(config.APP_DIR, "data", "vocab_files")):
            for name in files:
                if name.split(".")[-1] in FILE.MAPPER:
                    # load file
                    file_path = os.path.join(path, name)
                    file_format = FILE.MAPPER[name.split(".")[-1]]
                    # load graph
                    gr = Graph().parse(file_path, format=file_format)
                    file_name = name.split(".")[0]
                    # pickle to directory/vocab_files/
                    with open(join(path, file_name + ".p"), "wb") as f:
                        pickle.dump(gr, f)
                        f.close()

                    # extract vocab metadata
                    # Get the ConceptSchemes from the graph of the file
                    # Interpret the CS as a Vocab
                    q = """
                        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
                        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                        PREFIX dcterms: <http://purl.org/dc/terms/>
                        PREFIX owl: <http://www.w3.org/2002/07/owl#>
                        SELECT 
                            ?cs          # 0
                            ?title       # 1 
                            ?created     # 2
                            ?issued      # 3
                            ?modified    # 4
                            ?version     # 5
                            ?description # 6
                        WHERE {{
                            ?cs a skos:ConceptScheme .
                            OPTIONAL {{ 
                                ?cs skos:prefLabel ?title .
                                FILTER(lang(?title) = "{language}" || lang(?title) = "") 
                            }}
                            OPTIONAL {{ ?cs dcterms:created ?created }}
                            OPTIONAL {{ ?cs dcterms:issued ?issued }}
                            OPTIONAL {{ ?cs dcterms:modified ?modified }}
                            OPTIONAL {{ ?cs owl:versionInfo ?version }}
                            OPTIONAL {{ 
                                ?cs skos:definition ?description .
                                FILTER(lang(?description) = "{language}" || lang(?description) = "") 
                            }}
                        }}""".format(language=DEFAULT_LANGUAGE)

                    vocab_id = str(name).split(".")[0]
                    for cs in gr.query(q):
                        file_vocabs[vocab_id] = Vocabulary(
                            vocab_id,
                            str(cs[0]),
                            str(cs[1]) or str(cs[0]) if str(cs[1]) else str(
                                cs[0]),  # Need string, not None
                            str(cs[6]) if cs[6] is not None else None,
                            None,
                            dateutil.parser.parse(str(cs[2]))
                            if cs[2] is not None else None,
                            dateutil.parser.parse(str(cs[4]))
                            if cs[4] is not None else None,
                            str(cs[5])
                            if cs[5] is not None else None,  # versionInfo
                            config.VocabSource.FILE,
                            cs[0],
                        )
        g.VOCABS = {**g.VOCABS, **file_vocabs}

        logging.debug("FILE collect() complete.")
示例#9
0
    def collect(details):
        """
        For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes

        'ga-jena-fuseki': {
            'source': VocabSource.SPARQL,
            'sparql_endpoint': 'http://dev2.nextgen.vocabs.ga.gov.au/fuseki/vocabs',
            'sparql_username': '******', # Optional username for SPARQL endpoint
            'sparql_password': '******', # Optional password for SPARQL endpoint
            #'uri_filter_regex': '.*', # Regular expression to filter vocabulary URIs - Everything
            #'uri_filter_regex': '^http(s?)://pid.geoscience.gov.au/def/voc/ga/', # Regular expression to filter vocabulary URIs - GA
            #'uri_filter_regex': '^https://gcmdservices.gsfc.nasa.gov', # Regular expression to filter vocabulary URIs - GCMD
            'uri_filter_regex': '^http(s?)://resource.geosciml.org/', # Regular expression to filter vocabulary URIs - CGI

        },
        """
        logging.debug("SPARQL collect()...")

        # Get all the ConceptSchemes from the SPARQL endpoint
        # Interpret each CS as a Vocab
        q = """PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
SELECT * WHERE {{
    {{ GRAPH ?g {{
        ?cs a skos:ConceptScheme .
        OPTIONAL {{ ?cs skos:prefLabel ?title .
            FILTER(lang(?title) = "{language}" || lang(?title) = "") }}
        OPTIONAL {{ ?cs dcterms:created ?created }}
        OPTIONAL {{ ?cs dcterms:issued ?issued }}
        OPTIONAL {{ ?cs dcterms:modified ?modified }}
        OPTIONAL {{ ?cs owl:versionInfo ?version }}
        OPTIONAL {{ ?cs skos:definition ?description .
            FILTER(lang(?description) = "{language}" || lang(?description) = "") }}
    }} }}
    UNION
    {{
        ?cs a skos:ConceptScheme .
        OPTIONAL {{ ?cs skos:prefLabel ?title .
            FILTER(lang(?title) = "{language}" || lang(?title) = "") }}
        OPTIONAL {{ ?cs dcterms:created ?created }}
        OPTIONAL {{ ?cs dcterms:issued ?issued }}
        OPTIONAL {{ ?cs dcterms:modified ?modified }}
        OPTIONAL {{ ?cs owl:versionInfo ?version }}
        OPTIONAL {{ ?cs skos:definition ?description .
            FILTER(lang(?description) = "{language}" || lang(?description) = "") }}
    }}
}} 
ORDER BY ?title""".format(language=DEFAULT_LANGUAGE)
        # record just the IDs & title for the VocPrez in-memory vocabs list
        concept_schemes = Source.sparql_query(
            details["sparql_endpoint"],
            q,
            sparql_username=details.get("sparql_username"),
            sparql_password=details.get("sparql_password"),
        )
        assert concept_schemes is not None, "Unable to query conceptSchemes"

        sparql_vocabs = {}
        for cs in concept_schemes:
            # handling CS URIs that end with '/'
            vocab_id = cs["cs"]["value"].replace("/conceptScheme",
                                                 "").split("/")[-1]

            # TODO: Investigate putting regex into SPARQL query
            # print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value']))
            if details.get("uri_filter_regex") and not re.search(
                    details["uri_filter_regex"], cs["cs"]["value"]):
                logging.debug("Skipping vocabulary {}".format(vocab_id))
                continue

            if len(vocab_id) < 2:
                vocab_id = cs["cs"]["value"].split("/")[-2]

            sparql_vocabs[vocab_id] = Vocabulary(
                vocab_id,
                cs["cs"]["value"],
                cs["title"].get("value") or vocab_id if cs.get("title") else
                vocab_id,  # Need string value for sorting, not None
                cs["description"].get("value")
                if cs.get("description") is not None else None,
                None,  # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs
                dateutil.parser.parse(cs.get("created").get("value"))
                if cs.get("created") is not None else None,
                # dct:issued not in Vocabulary
                # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None,
                dateutil.parser.parse(cs.get("modified").get("value"))
                if cs.get("modified") is not None else None,
                cs["version"].get("value")
                if cs.get("version") is not None else None,  # versionInfo
                config.VocabSource.SPARQL,
                cs["cs"]["value"],
                sparql_endpoint=details["sparql_endpoint"],
                sparql_username=details["sparql_username"],
                sparql_password=details["sparql_password"],
            )
        g.VOCABS = {**g.VOCABS, **sparql_vocabs}
        logging.debug("SPARQL collect() complete.")
示例#10
0
    def collect(details):
        """
        For this source, one SPARQL endpoint is given for a series of vocabs which are all separate ConceptSchemes

        'gsq-graphdb': {
            'source': VocabSource.SPARQL,
            'sparql_endpoint': 'http://graphdb.gsq.digital:7200/repositories/GSQ_Vocabularies_core'
        },
        """
        logging.debug('SPARQL collect()...')

        # Get all the ConceptSchemes from the SPARQL endpoint
        # Interpret each CS as a Vocab
        q = '''
            PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX dcterms: <http://purl.org/dc/terms/>
            SELECT * WHERE {{
                GRAPH ?g {{
                    ?cs a skos:ConceptScheme .
                    OPTIONAL {{ ?cs skos:prefLabel ?title .
                        FILTER(lang(?title) = "{language}" || lang(?title) = "") }}
                    OPTIONAL {{ ?cs dcterms:created ?created }}
                    OPTIONAL {{ ?cs dcterms:issued ?issued }}
                    OPTIONAL {{ ?cs dcterms:modified ?modified }}
                    OPTIONAL {{ ?cs skos:definition ?description .
                        FILTER(lang(?description) = "{language}" || lang(?description) = "") }}
                }}
            }} 
            ORDER BY ?l
        '''.format(language=DEFAULT_LANGUAGE)
        # record just the IDs & title for the VocPrez in-memory vocabs list
        concept_schemes = Source.sparql_query(
            details['sparql_endpoint'],
            q,
            sparql_username=details.get('sparql_username'),
            sparql_password=details.get('sparql_password')) or {}
        sparql_vocabs = {}
        for cs in concept_schemes:
            # handling CS URIs that end with '/'
            vocab_id = cs['cs']['value'].replace('/conceptScheme',
                                                 '').split('/')[-1]

            #print("re.search('{}', '{}')".format(details.get('uri_filter_regex'), cs['cs']['value']))
            if details.get('uri_filter_regex') and not re.search(
                    details['uri_filter_regex'], cs['cs']['value']):
                logging.debug('Skipping vocabulary {}'.format(vocab_id))
                continue

            if len(vocab_id) < 2:
                vocab_id = cs['cs']['value'].split('/')[-2]

            sparql_vocabs[vocab_id] = Vocabulary(
                vocab_id,
                cs['cs']['value'].replace('/conceptScheme', ''),
                cs['title'].get('value') or vocab_id if cs.get('title') else
                vocab_id,  # Need string value for sorting, not None
                cs['description'].get('value')
                if cs.get('description') is not None else None,
                None,  # none of these SPARQL vocabs have creator info yet # TODO: add creator info to GSQ vocabs
                dateutil.parser.parse(cs.get('created').get('value'))
                if cs.get('created') is not None else None,
                # dct:issued not in Vocabulary
                # dateutil.parser.parse(cs.get('issued').get('value')) if cs.get('issued') is not None else None,
                dateutil.parser.parse(cs.get('modified').get('value'))
                if cs.get('modified') is not None else None,
                None,  # versionInfo
                config.VocabSource.SPARQL,
                cs['cs']['value'],
                sparql_endpoint=details['sparql_endpoint'],
                sparql_username=details['sparql_username'],
                sparql_password=details['sparql_password'])
        g.VOCABS = {**g.VOCABS, **sparql_vocabs}
        logging.debug('SPARQL collect() complete.')