示例#1
0
文件: RdfParser.py 项目: spsu/sylph
class RdfParser(object):
	"""A basic wrapper for RdfLib's RDF parser.
	This class aims to accomplish easier parsing, extraction of Models,
	etc."""

	def __init__(self, rdf, format='guess'):
		"""Init the parser with the graph string."""
		self.graph = Graph()
		if format == 'guess':
			format = self.__guess_format(rdf)
			print 'RdfParser guesses format to be: %s' % format
		try:
			self.graph.load(StringIO(rdf), format=format)
		except:
			print "Failed to parse RDF:"
			print rdf[0:100]

	def extract(self, datatype):
		"""Extract all of the data of a given datatype."""
		data = []
		ns = RdfSerializer.NAMESPACES['sylph'] # TODO: Awkward.
		for sub in self.graph.subjects(RDF.type, ns[datatype]):
			idx = str(sub)
			item = {'uri': idx}
			for pred, obj in self.graph.predicate_objects(sub):
				if pred == RDF.type:
					continue
				if obj == ns['None']:
					obj = None
				elif type(obj) == URIRef:
					obj = unicode(obj)
				elif type(obj) == Literal:
					obj = obj.toPython()
					if type(obj) == Literal: # Don't be silly, RdfLib!
						obj = unicode(obj)

				predstr = str(pred).rpartition('#')[2].rpartition('/')[2]
				item[predstr] = obj
			data.append(item)
		return data

	@staticmethod
	def __guess_format(st):
		"""Guess the format of the input string."""
		# TODO: At present, it can only guess between XML and n3, even
		# then this is a vague heuristic.
		if st.startswith('<'):
			return 'xml'
		return 'n3'