def _parseRDFFromString(contents, baseuri, type='unknown', scope=None, options=None): from vesper.data import base if scope is None: scope = '' #workaround 4suite bug options = options or {} if type.startswith('http://rx4rdf.sf.net/ns/wiki#rdfformat-'): type = type.split('-', 1)[1] if isinstance(contents, unicode): contents = contents.encode('utf8') try: while type == 'unknown': if isinstance(contents, (list, tuple)): if not contents: return contents, 'statements' if isinstance(contents[0], (tuple, BaseStatement)): return contents, 'statements' #looks like already a list of statements #otherwise assume pjson type='pjson' break elif isinstance(contents, dict): type='pjson' #assume pjson break startcontents = contents[:256].lstrip() if not startcontents: #empty return [], 'statements' if startcontents[0] in '{[': type='pjson' #assume pjson break else: from vesper import multipartjson if multipartjson.looks_like_multipartjson(startcontents): type = 'mjson' break try: from vesper.utils import htmlfilter ns, prefix, local = htmlfilter.getRootElementName(contents) if ns == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': type = 'rdfxml' elif local == 'rx': type = 'rxml_xml' elif ns == 'http://purl.org/atom/ns#' or local == 'rss': type = "rss-tag-soup" else: raise ParseException( "RDF parse error: Unsupported XML vocabulary: " + local) except: #hmmm, try our NTriples parser try: #convert generator to list to force parsing now return list(NTriples2Statements( StringIO.StringIO(contents), scope, baseuri, **options)) except: raise ParseException("unrecognized or invalid file contents") if type in ['ntriples', 'ntjson']: #use our parser return NTriples2Statements(StringIO.StringIO(contents), scope, baseuri, **options), type elif type == 'rss-tag-soup': try: #only redland's parser supports these import RDF parser=RDF.Parser(type) stream = parser.parse_string_as_stream(contents, baseuri) return base.redland2Statements(stream, scope), type except ImportError: raise ParseException("RDF parse error: "+ type+ " is only supported by Redland, which isn't installed") elif type == 'rdfxml' or type == 'turtle': if rdfParser: return rdfParser(contents, baseuri, type, scope) try: return _parseRDFWithRdfLib(contents, baseuri, type, scope) except ImportError: try: #try redland's parser return _parseRDFWithRdfLib(contents, baseuri, type, scope) except ImportError: raise ParseException("no %s parser installed" % type) elif type == 'json': return _parseRDFJSON(contents, scope), type elif type == 'pjson' or type == 'yaml' or type == 'mjson': from vesper import pjson if isinstance(contents, str): if type == 'yaml': import yaml contents = yaml.safe_load(contents) elif type == 'mjson': from vesper import multipartjson content = multipartjson.loads(contents, False) else: contents = json.loads(contents) options['scope'] = scope #XXX generateBnode doesn't detect collisions, maybe gen UUID instead if 'generateBnode' not in options: options['generateBnode']='uuid' if not contents: return [], type stmts = pjson.tostatements(contents, **options), type return stmts else: raise ParseException('unsupported type: ' + type) except ParseException: raise except: #an unexpected Exception raise ParseException("error parsing "+type)