Пример #1
0
    def get_entities_of_class(self, _class_uri):
        '''
			This function can fetch the properties connected to the class passed as a function parameter _class_uri.

			Return Type
				[ S,S,S,S...]
		'''
        #Check if the resource URI is shorthand or a proper URI
        if not nlutils.has_url(_class_uri):
            warnings.warn(
                "The passed class %s is not a proper URI but is in shorthand. This is strongly discouraged."
                % _class_uri)
            _class_uri = nlutils.convert_shorthand_to_uri(_class_uri)

        #Preparing the SPARQL Query
        sparql = SPARQLWrapper(self.select_sparql_endpoint())
        # with SPARQLWrapper(self.sparql_endpoint) as sparql:
        _class_uri = '<' + _class_uri + '>'
        sparql.setQuery(GET_ENTITIES_OF_CLASS % {'target_class': _class_uri})
        sparql.setReturnFormat(JSON)
        response = sparql.query().convert()

        try:
            entity_list = [
                x[u'entity'][u'value'].encode('ascii', 'ignore')
                for x in response[u'results'][u'bindings']
            ]
        except:
            #TODO: Find and handle exceptions appropriately
            traceback.print_exc()
            # pass

        return entity_list
    def get_type_of_resource(self, _resource_uri, _filter_dbpedia=False):
        """
			Function fetches the type of a given entity
			and can optionally filter out the ones of DBPedia only
		"""
        # @TODO: Add basic caching setup.
        if not nlutils.has_url(_resource_uri):
            warnings.warn(
                "The passed resource %s is not a proper URI but probably a shorthand. This is strongly discouraged."
                % _resource_uri)
            _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri)
        _resource_uri = '<' + _resource_uri + '>'
        response = self.shoot_custom_query(GET_TYPE_OF_RESOURCE %
                                           {'target_resource': _resource_uri})
        try:
            type_list = [
                x[u'type'][u'value'].encode('ascii', 'ignore')
                for x in response[u'results'][u'bindings']
            ]
        except:
            traceback.print_exc()

        # If we need only DBPedia's types
        if _filter_dbpedia:
            filtered_type_list = [
                x for x in type_list if x[:28] in [
                    'http://dbpedia.org/ontology/',
                    'http://dbpedia.org/property/'
                ]
            ]
            return filtered_type_list

        return type_list
    def get_properties_of_resource(self,
                                   _resource_uri,
                                   _with_connected_resource=False,
                                   right=True):
        """
			This function can fetch the properties connected to this '_resource', in the format - _resource -> R -> O
			The boolean flag can be used if we want to return the (R,O) tuples instead of just R

			Return Type
				if _with_connected_resource == True, [ [R,O], [R,O], [R,O] ...]
				else [ R,R,R,R...]
		"""
        # Check if the resource URI is shorthand or a proper URI
        temp_query = ""
        if not nlutils.has_url(_resource_uri):
            warnings.warn(
                "The passed resource %s is not a proper URI but is in shorthand. This is strongly discouraged."
                % _resource_uri)
            _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri)

        # Prepare the SPARQL Request		sparql = SPARQLWrapper(self.select_sparql_endpoint())
        # with SPARQLWrapper(self.sparql_endpoint) as sparql:
        _resource_uri = '<' + _resource_uri + '>'
        if _with_connected_resource:
            if right:
                temp_query = GET_RIGHT_PROPERTIES_OF_RESOURCE_WITH_OBJECTS % {
                    'target_resource': _resource_uri
                }
            else:
                temp_query = GET_LEFT_PROPERTIES_OF_RESOURCE_WITH_OBJECTS % {
                    'target_resource': _resource_uri
                }
        else:
            if right:
                temp_query = GET_RIGHT_PROPERTIES_OF_RESOURCE % {
                    'target_resource': _resource_uri
                }
            else:
                temp_query = GET_LEFT_PROPERTIES_OF_RESOURCE % {
                    'target_resource': _resource_uri
                }
            response = self.shoot_custom_query(temp_query)

        try:
            if _with_connected_resource:
                property_list = [[
                    x[u'property'][u'value'].encode('ascii', 'ignore'),
                    x[u'resource'][u'value'].encode('ascii', 'ignore')
                ] for x in response[u'results'][u'bindings']]
            else:
                property_list = [
                    x[u'property'][u'value'].encode('ascii', 'ignore')
                    for x in response[u'results'][u'bindings']
                ]
        except:
            # TODO: Find and handle exceptions appropriately
            traceback.print_exc()
        # pass

        return property_list
Пример #4
0
    def get_properties_on_resource(self, _resource_uri):
        """
			Fetch properties that point to this resource.
			Eg.
			Barack Obama -> Ex-President of -> _resource_uri would yield ex-president of as the relation
		"""
        if not nlutils.has_url(_resource_uri):
            warnings.warn(
                "The passed resource %s is not a proper URI but is in shorthand. This is strongly discouraged." % _resource_uri)
            _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri)
        response = self.shoot_custom_query(GET_PROPERTIES_ON_RESOURCE % {'target_resource': _resource_uri})
    def get_most_specific_class(self, _resource_uri):
        """
			Query to find the most specific DBPedia Ontology class given a URI.
			Limitation: works only with resources.
			@TODO: Extend this to work with ontology (not entities) too. Or properties.
		"""

        if not nlutils.has_url(_resource_uri):
            warnings.warn(
                "The passed resource %s is not a proper URI but probably a shorthand. This is strongly discouraged."
                % _resource_uri)
            _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri)

        # Get the DBpedia classes of resource
        classes = self.get_type_of_resource(_resource_uri,
                                            _filter_dbpedia=True)

        length_array = []  # A list of tuples, it's use explained below

        # For every class, find the length of path to owl:Thing.
        for class_uri in classes:

            # Preparing the query
            target_class = '<' + class_uri + '>'
            try:
                response = self.shoot_custom_query(
                    GET_CLASS_PATH % {'target_class': target_class})
            except:
                traceback.print_exc()

            # Parsing the Result
            try:
                results = [
                    x[u'type'][u'value'].encode('ascii', 'ignore')
                    for x in response[u'results'][u'bindings']
                ]
            except:
                traceback.print_exc()

            # Count the number of returned classes and store it in treturn max(length_array,key=itemgetter(1))[0]he list.
            length_array.append((class_uri, len(results)))

        if len(length_array) > 0:
            return max(length_array, key=itemgetter(1))[0]
        else:
            # If there is no results from the filter type , return it as owl Thing
            return "http://www.w3.org/2002/07/owl#Thing"
    def get_label(self, _resource_uri):
        """
			Function used to fetch the english label for a given resource.
			Not thoroughly tested tho.

			Also now it stores the labels in a pickled folder and

			Always returns one value
		"""

        # print _resource_uri, "**"
        if not nlutils.has_url(_resource_uri):
            _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri)

        # Remove leading and trailing angle brackets
        _resource_uri = _resource_uri.replace('<', '').replace('>', '')

        # Preparing the Query
        _resource_uri = '<' + _resource_uri + '>'

        return nlutils.get_label_via_parsing(_resource_uri)
Пример #7
0
    def get_type_of_resource(self, _resource_uri, _filter_dbpedia=False):
        '''
			Function fetches the type of a given entity
			and can optionally filter out the ones of DBPedia only
		'''

        if not nlutils.has_url(_resource_uri):
            warnings.warn(
                "The passed resource %s is not a proper URI but probably a shorthand. This is strongly discouraged."
                % _resource_uri)
            _resource_uri = nlutils.convert_shorthand_to_uri(_resource_uri)

        #Perparing the SPARQL Query
        sparql = SPARQLWrapper(self.select_sparql_endpoint())
        _resource_uri = '<' + _resource_uri + '>'
        sparql.setQuery(GET_TYPE_OF_RESOURCE %
                        {'target_resource': _resource_uri})
        sparql.setReturnFormat(JSON)
        response = sparql.query().convert()

        try:
            type_list = [
                x[u'type'][u'value'].encode('ascii', 'ignore')
                for x in response[u'results'][u'bindings']
            ]
        except:
            traceback.print_exc()

        #If we need only DBPedia's types
        if _filter_dbpedia:
            filtered_type_list = [
                x for x in type_list if x[:28] in [
                    'http://dbpedia.org/ontology/',
                    'http://dbpedia.org/property/'
                ]
            ]
            return filtered_type_list

        return type_list