def _schema_nodes(self): """parse self._ontology_file into a graph""" name, ext = os.path.splitext(self._ontology_file) if ext in ['.ttl']: self._ontology_parser_function = lambda s: rdflib.Graph().parse(s, format='n3') else: self._ontology_parser_function = lambda s: pyRdfa().graph_from_source(s) errorstring = "Are you calling parse_ontology from the base SchemaDef class?" if not self._ontology_parser_function: raise ValueError("No function found to parse ontology. %s" % errorstring) if not self._ontology_file: raise ValueError("No ontology file specified. %s" % errorstring) if not self.lexicon: raise ValueError("No lexicon object assigned. %s" % errorstring) latest_file = self._pull_standard() try: self.graph = self._ontology_parser_function(latest_file) except: raise IOError("Error parsing ontology at %s" % latest_file) for subj, pred, obj in self.graph: self.ontology[subj].append((pred, obj)) yield (subj, pred, obj)
def extrair_rdfa(url): options = Options(embedded_rdf=True) #r = requests.get(url) #print pyRdfa(options=options).rdf_from_source(url,outputFormat='pretty-xml') g1 = pyRdfa(options=options).rdf_from_source(url, outputFormat='pretty-xml') #print g1#g2 = pyRdfa(options=options).rdf_from_source('http://rbarbosa.me/ex.html',outputFormat='pretty-xml') g = Graph() g.parse(io.BytesIO(g1)) return g
def __init__(self, source): super(CompoundGraph, self).__init__() try: self.microdata_graph = pyMicrodata().graph_from_source(source) except: self.microdata_graph = None try: self.rdfa_graph = pyRdfa().graph_from_source(source) except: self.rdfa_graph = None
def parse(self, source, graph, pgraph = None, embedded_rdf = True, vocab_expansion = False, vocab_cache = False, rdfOutput = False) : """ @param source: one of the input sources that the RDFLib package defined @type source: InputSource class instance @param graph: target graph for the triples; output graph, in RDFa spec. parlance @type graph: RDFLib Graph @keyword pgraph: target for error and warning triples; processor graph, in RDFa spec. parlance. If set to None, these triples are ignored @type pgraph: RDFLib Graph @keyword embedded_rdf: some formats allow embedding RDF in other formats: (X)HTML can contain turtle in a special <script> element, SVG can have RDF/XML embedded in a <metadata> element. This flag controls whether those triples should be interpreted and added to the output graph. Some languages (e.g., SVG) require this, and the flag is ignored. @type embedded_rdf: Boolean @keyword vocab_expansion: whether the RDFa @vocab attribute should also mean vocabulary expansion (see the RDFa 1.1 spec for further details) @type vocab_expansion: Boolean @keyword vocab_cache: in case vocab expansion is used, whether the expansion data (i.e., vocabulary) should be cached locally. This requires the ability for the local application to write on the local file system @type vocab_chache: Boolean @keyword rdfOutput: whether Exceptions should be catched and added, as triples, to the processor graph, or whether they should be raised. @type rdfOutput: Boolean """ if isinstance(source, StringInputSource) : orig_source = source.getByteStream() elif isinstance(source, URLInputSource) : orig_source = source.url elif isinstance(source, FileInputSource) : orig_source = source.file.name source.file.close() baseURI = source.getPublicId() # The RDFa part from pyRdfa import pyRdfa, Options self.options = Options(output_processor_graph = (pgraph != None), embedded_rdf = embedded_rdf, vocab_expansion = vocab_expansion, vocab_cache = vocab_cache) processor = pyRdfa(self.options, base = baseURI, media_type = 'text/html', rdfa_version = '1.1') processor.graph_from_source(orig_source, graph=graph, pgraph=pgraph, rdfOutput = rdfOutput) # The Microdata part try: from pyMicrodata import pyMicrodata processor = pyMicrodata(base = baseURI, vocab_expansion = vocab_expansion, vocab_cache = vocab_cache) processor.graph_from_source(orig_source, graph=graph, rdfOutput = rdfOutput) except ImportError: # no pyMicrodata installed! pass
def parse(self, source, graph, pgraph = None, media_type = None, rdfa_version = None, embedded_rdf = False, vocab_expansion = False, vocab_cache = False, rdfOutput = False) : """ @param source: one of the input sources that the RDFLib package defined @type source: InputSource class instance @param graph: target graph for the triples; output graph, in RDFa spec. parlance @type graph: RDFLib Graph @keyword pgraph: target for error and warning triples; processor graph, in RDFa spec. parlance. If set to None, these triples are ignored @type pgraph: RDFLib Graph @keyword media_type: explicit setting of the preferred media type (a.k.a. content type) of the the RDFa source. None means the content type of the HTTP result is used, or a guess is made based on the suffix of a file @type media_type: string @keyword rdfa_version: 1.0 or 1.1. If the value is None, then, by default, 1.1 is used unless the source has explicit signals to use 1.0 (e.g., using a @version attribute, using a DTD set up for 1.0, etc) @type rdfa_version: string @keyword embedded_rdf: some formats allow embedding RDF in other formats: (X)HTML can contain turtle in a special <script> element, SVG can have RDF/XML embedded in a <metadata> element. This flag controls whether those triples should be interpreted and added to the output graph. Some languages (e.g., SVG) require this, and the flag is ignored. @type embedded_rdf: Boolean @keyword vocab_expansion: whether the RDFa @vocab attribute should also mean vocabulary expansion (see the RDFa 1.1 spec for further details) @type vocab_expansion: Boolean @keyword vocab_cache: in case vocab expansion is used, whether the expansion data (i.e., vocabulary) should be cached locally. This requires the ability for the local application to write on the local file system @type vocab_chache: Boolean @keyword rdfOutput: whether Exceptions should be catched and added, as triples, to the processor graph, or whether they should be raised. @type rdfOutput: Boolean """ from pyRdfa import pyRdfa, Options if isinstance(source, StringInputSource) : orig_source = source.getByteStream() elif isinstance(source, URLInputSource) : orig_source = source.url elif isinstance(source, FileInputSource) : orig_source = source.file.name source.file.close() self.options = Options(output_processor_graph = (pgraph != None), embedded_rdf = embedded_rdf, vocab_expansion = vocab_expansion, vocab_cache = vocab_cache) baseURI = source.getPublicId() processor = pyRdfa(self.options, base = baseURI, media_type = media_type, rdfa_version = rdfa_version) processor.graph_from_source(orig_source, graph=graph, pgraph=pgraph, rdfOutput = rdfOutput)
def __init__(self, url, impl): self.ns_ont = {} self.attribs_by_class = defaultdict(list) self.ontologies = [] # are these initializations necessary self.attributes = [] self.source = url self.impl = impl if 'rdfa' == impl: self.range_uri = "http://www.w3.org/2000/01/rdf-schema#range" self.domain_uri = "http://www.w3.org/2000/01/rdf-schema#domain" self.type_uri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" self.subclass_uri = "http://www.w3.org/2000/01/rdf-schema#subClassOf" self.parser = pyRdfa() elif 'microdata' == impl: self.range_uri = "http://schema.org/range" self.domain_uri = "http://schema.org/domain" self.type_uri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" self.subclass_uri = "http://www.w3.org/2000/01/rdf-schema#subClassOf" self.parser = pyMicrodata() return super(Graph, self).__init__()
def parse(self): """ Parse the RDFa input and store the processor and default graphs. The final media type is also updated. """ transformers = [] if self.rdfa_lite: from pyRdfa.transform.lite import lite_prune transformers.append(lite_prune) options = Options(output_default_graph=True, output_processor_graph=True, transformers=transformers, vocab_expansion=self.vocab_expansion, embedded_rdf=self.embedded_rdf, add_informational_messages=True) processor = pyRdfa(options=options, base=self.base, media_type=self.media_type) processor.graph_from_source(self.uri, graph=self.default_graph, pgraph=self.processor_graph, rdfOutput=True) # Extracting some parameters for the error messages self.processor = processor
def _schema_nodes(self): """parse self._ontology_file into a graph""" name, ext = os.path.splitext(self._ontology_file) if ext in [".ttl"]: self._ontology_parser_function = lambda s: rdflib.Graph().parse(s, format="n3") else: self._ontology_parser_function = lambda s: pyRdfa().graph_from_source(s) if not self._ontology_parser_function: raise ValueError("No function found to parse ontology. %s" % errorstring_base) if not self._ontology_file: raise ValueError("No ontology file specified. %s" % errorstring_base) if not self.lexicon: raise ValueError("No lexicon object assigned. %s" % errorstring_base) latest_file = self._read_schema() try: self.graph = self._ontology_parser_function(latest_file) except: raise IOError("Error parsing ontology at %s" % latest_file) for subj, pred, obj in self.graph: self.ontology[subj].append((pred, obj)) yield (subj, pred, obj)
def return_graph(uri, options, newCache=False): """Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked on the RDFa content. The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used. This function is used to retreive the vocabulary file and turn it into an RDFLib graph. @param uri: URI for the graph @param options: used as a place where warnings can be sent @param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text @return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful """ def return_to_cache(msg): if newCache: options.add_warning(err_unreachable_vocab % uri, warning_type=VocabReferenceError) else: options.add_warning(err_outdated_cache % uri, warning_type=VocabReferenceError) retval = None expiration_date = None content = None try: content = URIOpener( uri, { 'Accept': 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9' }) except HTTPError: (type, value, traceback) = sys.exc_info() return_to_cache(value) return (None, None) except RDFaError: (type, value, traceback) = sys.exc_info() return_to_cache(value) return (None, None) except Exception: (type, value, traceback) = sys.exc_info() return_to_cache(value) return (None, None) # Store the expiration date of the newly accessed data expiration_date = content.expiration_date if content.content_type == MediaTypes.turtle: try: retval = Graph() retval.parse(content.data, format="n3") except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_Turtle_vocab % (uri, value)) elif content.content_type == MediaTypes.rdfxml: try: retval = Graph() retval.parse(content.data) except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_Turtle_vocab % (uri, value)) elif content.content_type == MediaTypes.nt: try: retval = Graph() retval.parse(content.data, format="nt") except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_ntriples_vocab % (uri, value)) elif content.content_type in [ MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml ] or xml_application_media_type.match(content.content_type) != None: try: from pyRdfa import pyRdfa from pyRdfa.options import Options options = Options() retval = pyRdfa(options).graph_from_source(content.data) except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_rdfa_vocab % (uri, value)) else: options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type)) return (retval, expiration_date)
def _parse_func(s): return pyRdfa().graph_from_source(s)
try: retval = Graph() retval.parse(content.data, format="nt") except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_ntriples_vocab % (uri, value)) elif ( content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml] or xml_application_media_type.match(content.content_type) != None ): try: from pyRdfa import pyRdfa from pyRdfa.options import Options options = Options() retval = pyRdfa(options).graph_from_source(content.data) except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_rdfa_vocab % (uri, value)) else: options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type)) return (retval, expiration_date) ############################################################################################ type = ns_rdf["type"] Property = ns_rdf["Property"] Class = ns_rdfs["Class"] subClassOf = ns_rdfs["subClassOf"] subPropertyOf = ns_rdfs["subPropertyOf"]
def __init__(self, graph, doc_lines, url=""): super(RdfValidator, self).__init__(graph, doc_lines, url=url) self.parser = pyRdfa() self.graph = self.graph.rdfa_graph # use the rdfa half of the compound graph
def __init__(self, graph, doc_lines): super(RdfValidator, self).__init__(graph, doc_lines) self.parser = pyRdfa() self.graph = self.graph.rdfa_graph # use the rdfa half of the compound graph log.info("in RdfValidator init %s" % self.graph)
output_processor_graph = True elif a == "default": output_default_graph = True output_processor_graph = False else: usage() sys.exit(1) except: usage() sys.exit(1) options = Options(output_default_graph=output_default_graph, output_processor_graph=output_processor_graph, space_preserve=space_preserve, transformers=extras, embedded_rdf=embedded_rdf, vocab_expansion=vocab_expansion, vocab_cache=vocab_cache, vocab_cache_report=vocab_cache_report, refresh_vocab_cache=refresh_vocab_cache) processor = pyRdfa(options, base) if len(value) >= 1: print processor.rdf_from_sources(value, outputFormat=format, rdfOutput=rdfOutput) else: print processor.rdf_from_source(sys.stdin, outputFormat=format, rdfOutput=rdfOutput)
def test_url(self): g = pyRdfa().rdf_from_source('http://oreilly.com/catalog/9780596516499/') self.assert_(self.target1 in g)
def return_graph(uri, options, newCache = False) : """Parse a file, and return an RDFLib Graph. The URI's content type is checked and either one of RDFLib's parsers is invoked (for the Turtle, RDF/XML, and N Triple cases) or a separate RDFa processing is invoked on the RDFa content. The Accept header of the HTTP request gives a preference to Turtle, followed by RDF/XML and then HTML (RDFa), in case content negotiation is used. This function is used to retreive the vocabulary file and turn it into an RDFLib graph. @param uri: URI for the graph @param options: used as a place where warnings can be sent @param newCache: in case this is used with caching, whether a new cache is generated; that modifies the warning text @return: A tuple consisting of an RDFLib Graph instance and an expiration date); None if the dereferencing or the parsing was unsuccessful """ def return_to_cache(msg) : if newCache : options.add_warning(err_unreachable_vocab % uri, warning_type=VocabReferenceError) else : options.add_warning(err_outdated_cache % uri, warning_type=VocabReferenceError) retval = None expiration_date = None content = None try : content = URIOpener(uri, {'Accept' : 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'}) except HTTPError : (type,value,traceback) = sys.exc_info() return_to_cache(value) return (None,None) except RDFaError : (type,value,traceback) = sys.exc_info() return_to_cache(value) return (None,None) except Exception : (type,value,traceback) = sys.exc_info() return_to_cache(value) return (None,None) # Store the expiration date of the newly accessed data expiration_date = content.expiration_date if content.content_type == MediaTypes.turtle : try : retval = Graph() retval.parse(content.data, format="n3") except : (type,value,traceback) = sys.exc_info() options.add_warning(err_unparsable_Turtle_vocab % (uri,value)) elif content.content_type == MediaTypes.rdfxml : try : retval = Graph() retval.parse(content.data) except : (type,value,traceback) = sys.exc_info() options.add_warning(err_unparsable_Turtle_vocab % (uri,value)) elif content.content_type == MediaTypes.nt : try : retval = Graph() retval.parse(content.data, format="nt") except : (type,value,traceback) = sys.exc_info() options.add_warning(err_unparsable_ntriples_vocab % (uri,value)) elif content.content_type in [MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml] or xml_application_media_type.match(content.content_type) != None : try : from pyRdfa import pyRdfa from pyRdfa.options import Options options = Options() retval = pyRdfa(options).graph_from_source(content.data) except : (type,value,traceback) = sys.exc_info() options.add_warning(err_unparsable_rdfa_vocab % (uri,value)) else : options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type)) return (retval, expiration_date)
options.add_warning(err_unparsable_Turtle_vocab % (uri, value)) elif content.content_type == MediaTypes.nt: try: retval = Graph() retval.parse(content.data, format="nt") except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_ntriples_vocab % (uri, value)) elif content.content_type in [ MediaTypes.xhtml, MediaTypes.html, MediaTypes.xml ] or xml_application_media_type.match(content.content_type) != None: try: from pyRdfa import pyRdfa from pyRdfa.options import Options options = Options() retval = pyRdfa(options).graph_from_source(content.data) except: (type, value, traceback) = sys.exc_info() options.add_warning(err_unparsable_rdfa_vocab % (uri, value)) else: options.add_warning(err_unrecognised_vocab_type % (uri, content.content_type)) return (retval, expiration_date) ############################################################################################ type = ns_rdf["type"] Property = ns_rdf["Property"] Class = ns_rdfs["Class"] subClassOf = ns_rdfs["subClassOf"]
args = parser.parse_args() EXLIST = [] for ex in args.example: EXLIST.extend(ex) import io import re import rdflib from rdflib.serializer import Serializer from rdflib.parser import Parser from pyRdfa import pyRdfa RDFaProcessor = pyRdfa("") from schemaexamples import SchemaExamples, Example def validate(): COUNT = 0 ERRORCOUNT = 0 SchemaExamples.loadExamplesFiles("default") print("Loaded %d examples " % (SchemaExamples.count())) print("Processing")
def test_file(self): g = pyRdfa().rdf_from_source('test/rdfa/oreilly.html') self.assert_(self.target2.encode('utf-8') in g)
def test_file(self): g = pyRdfa().rdf_from_source('test/rdfa/oreilly.html') self.assert_(self.target2 in g)
def test_templates_course_detail_rdfa(self): """ Extract RDFa tags from the HTML markup and check that it is complete as expected. """ # Create organizations main_organization = OrganizationFactory(page_title="Main org", fill_logo=True, should_publish=True) other_organization = OrganizationFactory(page_title="Other org", fill_logo=True, should_publish=True) # Create persons author1 = PersonFactory(page_title="François", fill_portrait=True) placeholder = author1.extended_object.placeholders.get(slot="bio") add_plugin( language="en", placeholder=placeholder, plugin_type="PlainTextPlugin", body="La bio de François", ) author2 = PersonFactory(page_title="Jeanne", fill_portrait=True, should_publish=True) # Create a course with cover image, team and organizations licence_content, licence_participation = LicenceFactory.create_batch(2) course = CourseFactory( code="abcde", effort=[3, "hour"], page_title="Very interesting course", fill_cover=True, fill_organizations=[main_organization, other_organization], fill_team=[author1, author2], fill_licences=[ ("course_license_content", licence_content), ("course_license_participation", licence_participation), ], ) # Add an introduction to the course placeholder = course.extended_object.placeholders.get( slot="course_introduction") add_plugin( language="en", placeholder=placeholder, plugin_type="PlainTextPlugin", body="Introduction to interesting course", ) # Create an ongoing open course run that will be published (created before # publishing the page) now = datetime(2030, 6, 15, tzinfo=timezone.utc) CourseRunFactory( direct_course=course, start=datetime(2030, 6, 30, tzinfo=timezone.utc), end=datetime(2030, 8, 1, tzinfo=timezone.utc), enrollment_start=datetime(2030, 6, 14, tzinfo=timezone.utc), enrollment_end=datetime(2030, 6, 16, tzinfo=timezone.utc), languages=["en", "fr"], ) CourseRunFactory( direct_course=course, start=datetime(2030, 6, 1, tzinfo=timezone.utc), end=datetime(2030, 7, 10, tzinfo=timezone.utc), enrollment_start=datetime(2030, 6, 13, tzinfo=timezone.utc), enrollment_end=datetime(2030, 6, 20, tzinfo=timezone.utc), languages=["de"], ) author1.extended_object.publish("en") course.extended_object.publish("en") url = course.extended_object.get_absolute_url() with mock.patch.object(timezone, "now", return_value=now): response = self.client.get(url) self.assertEqual(response.status_code, 200) processor = pyRdfa() content = str(response.content) parser = html5lib.HTMLParser( tree=html5lib.treebuilders.getTreeBuilder("dom")) dom = parser.parse(io.StringIO(content)) graph = processor.graph_from_DOM(dom) # Retrieve the course top node (body) (subject, ) = graph.subjects( URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/Course"), ) self.assertEqual(len(list(graph.triples((subject, None, None)))), 38) # Opengraph self.assertTrue(( subject, URIRef("http://ogp.me/ns#url"), Literal("http://example.com/en/very-interesting-course/"), ) in graph) self.assertTrue((subject, URIRef("http://ogp.me/ns#site_name"), Literal("example.com")) in graph) self.assertTrue((subject, URIRef("http://ogp.me/ns#type"), Literal("website")) in graph) self.assertTrue((subject, URIRef("http://ogp.me/ns#locale"), Literal("en")) in graph) self.assertTrue((subject, URIRef("http://ogp.me/ns#determiner"), Literal("")) in graph) self.assertTrue(( subject, URIRef("http://ogp.me/ns#title"), Literal("Very interesting course"), ) in graph) self.assertTrue(( subject, URIRef("http://ogp.me/ns#description"), Literal("Introduction to interesting course"), ) in graph) (image_value, ) = graph.objects(subject, URIRef("http://ogp.me/ns#image")) pattern = ( r"/media/filer_public_thumbnails/filer_public/.*cover\.jpg__" r"1200x630_q85_crop_replace_alpha-%23FFFFFF_subject_location") self.assertIsNotNone(re.search(pattern, str(image_value))) # Schema.org # - Course self.assertTrue(( subject, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/Course"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/name"), Literal("Very interesting course"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/description"), Literal("Introduction to interesting course"), ) in graph) self.assertTrue((subject, URIRef("https://schema.org/courseCode"), Literal("ABCDE")) in graph) self.assertTrue((subject, URIRef("https://schema.org/isAccessibleForFree"), Literal("true")) in graph) self.assertTrue((subject, URIRef("https://schema.org/timeRequired"), Literal("PT3H")) in graph) self.assertTrue(( subject, URIRef("https://schema.org/stylesheet"), URIRef("/static/richie/css/main.css"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/shortcut"), URIRef("/static/richie/favicon/favicon.ico"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/icon"), URIRef("/static/richie/favicon/favicon.ico"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/icon"), URIRef("/static/richie/favicon/favicon-16x16.png"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/icon"), URIRef("/static/richie/favicon/favicon-32x32.png"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/apple-touch-icon"), URIRef("/static/richie/favicon/apple-touch-icon.png"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/mask-icon"), URIRef("/static/richie/favicon/safari-pinned-tab.svg"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/manifest"), URIRef("/static/richie/favicon/site.webmanifest"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/noreferrer"), URIRef("https://www.facebook.com/example"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/noopener"), URIRef("https://www.facebook.com/example"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/alternate"), URIRef("http://example.com/en/very-interesting-course/"), ) in graph) self.assertTrue(( subject, URIRef("https://schema.org/alternate"), URIRef("http://example.com/fr/very-interesting-course/"), ) in graph) (image_value, ) = graph.objects(subject, URIRef("https://schema.org/image")) pattern = ( r"/media/filer_public_thumbnails/filer_public/.*cover\.jpg__" r"300x170_q85_crop_replace_alpha-%23FFFFFF_subject_location") self.assertIsNotNone(re.search(pattern, str(image_value))) self.assertTrue((subject, URIRef("https://schema.org/license"), URIRef(licence_content.url)) in graph) self.assertTrue(( None, URIRef("https://schema.org/license"), URIRef(licence_participation.url), ) not in graph) # - Main organization (Provider) self.assertTrue((subject, URIRef("https://schema.org/provider"), URIRef("/en/main-org/")) in graph) self.assertTrue(( URIRef("/en/main-org/"), URIRef("https://schema.org/name"), Literal("Main org"), ) in graph) self.assertTrue(( URIRef("/en/main-org/"), URIRef("https://schema.org/url"), Literal("http://example.com/en/main-org/"), ) in graph) (logo_value, ) = graph.objects(URIRef("/en/main-org/"), URIRef("https://schema.org/logo")) pattern = (r"/media/filer_public_thumbnails/filer_public/.*logo.jpg__" r"200x113_q85_replace_alpha-%23FFFFFF_subject_location") self.assertIsNotNone(re.search(pattern, str(logo_value))) # - Organizations (Contributor) contributor_subjects = list( graph.objects(subject, URIRef("https://schema.org/contributor"))) self.assertEqual(len(contributor_subjects), 2) self.assertTrue(( contributor_subjects[0], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/CollegeOrUniversity"), ) in graph) self.assertTrue(( contributor_subjects[1], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/CollegeOrUniversity"), ) in graph) self.assertTrue(( URIRef("/en/main-org/"), URIRef("https://schema.org/name"), Literal("Main org"), ) in graph) self.assertTrue(( URIRef("/en/other-org/"), URIRef("https://schema.org/name"), Literal("Other org"), ) in graph) self.assertTrue(( URIRef("/en/main-org/"), URIRef("https://schema.org/url"), Literal("http://example.com/en/main-org/"), ) in graph) self.assertTrue(( URIRef("/en/other-org/"), URIRef("https://schema.org/url"), Literal("http://example.com/en/other-org/"), ) in graph) pattern = (r"/media/filer_public_thumbnails/filer_public/.*logo.jpg__" r"200x113_q85_replace_alpha-%23FFFFFF_subject_location") (logo_value, ) = graph.objects(URIRef("/en/main-org/"), URIRef("https://schema.org/logo")) self.assertIsNotNone(re.search(pattern, str(logo_value))) (logo_value, ) = graph.objects(URIRef("/en/other-org/"), URIRef("https://schema.org/logo")) self.assertIsNotNone(re.search(pattern, str(logo_value))) # - Team (Person) author_subjects = list( graph.objects(subject, URIRef("https://schema.org/author"))) self.assertEqual(len(author_subjects), 2) self.assertTrue(( author_subjects[0], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/Person"), ) in graph) self.assertTrue(( author_subjects[1], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/Person"), ) in graph) for name in ["Fran\\xc3\\xa7ois", "Jeanne"]: (author_subject, ) = graph.subjects( URIRef("https://schema.org/name"), Literal(name)) self.assertTrue(author_subject in author_subjects) (author_subject, ) = graph.subjects( URIRef("https://schema.org/description"), Literal("La bio de Fran\\xc3\\xa7ois"), ) self.assertTrue(author_subject in author_subjects) for url in [ "http://example.com/en/francois/", "http://example.com/en/jeanne/" ]: (author_subject, ) = graph.subjects( URIRef("https://schema.org/url"), Literal(url)) self.assertTrue(author_subject in author_subjects) pattern = ( r"/media/filer_public_thumbnails/filer_public/.*portrait.jpg__" r"200x200_q85_crop_replace_alpha-%23FFFFFF_subject_location") for author_subject in author_subjects: (portrait_value, ) = graph.objects( author_subject, URIRef("https://schema.org/image")) self.assertIsNotNone(re.search(pattern, str(portrait_value))) # - Course runs (CourseInstance) course_run_subjects = list( graph.objects(subject, URIRef("https://schema.org/hasCourseInstance"))) self.assertEqual(len(course_run_subjects), 2) self.assertTrue(( course_run_subjects[0], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/CourseInstance"), ) in graph) self.assertTrue(( course_run_subjects[0], URIRef("https://schema.org/courseMode"), Literal("online"), ) in graph) self.assertTrue(( course_run_subjects[1], URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), URIRef("https://schema.org/CourseInstance"), ) in graph) self.assertTrue(( course_run_subjects[1], URIRef("https://schema.org/courseMode"), Literal("online"), ) in graph) for start_date in ["2030-06-01", "2030-06-30"]: (subject, ) = graph.subjects( URIRef("https://schema.org/startDate"), Literal(start_date)) self.assertTrue(subject in course_run_subjects) for end_date in ["2030-07-10", "2030-08-01"]: (subject, ) = graph.subjects(URIRef("https://schema.org/endDate"), Literal(end_date)) self.assertTrue(subject in course_run_subjects)
output_processor_graph = True elif a == "processor,default" or a == "default,processor" : output_processor_graph = True elif a == "default" : output_default_graph = True output_processor_graph = False else : usage() sys.exit(1) except : usage() sys.exit(1) options = Options(output_default_graph = output_default_graph, output_processor_graph = output_processor_graph, space_preserve=space_preserve, transformers = extras, embedded_rdf = embedded_rdf, vocab_expansion = vocab_expansion, vocab_cache = vocab_cache, vocab_cache_report = vocab_cache_report, refresh_vocab_cache = refresh_vocab_cache ) processor = pyRdfa(options, base) if len(value) >= 1 : print processor.rdf_from_sources(value, outputFormat = format, rdfOutput = rdfOutput) else : print processor.rdf_from_source(sys.stdin, outputFormat = format, rdfOutput = rdfOutput)
def test_url(self): g = pyRdfa().rdf_from_source( 'http://oreilly.com/catalog/9780596516499/') self.assert_(self.target1.encode('utf-8') in g)