def __init__(self, alias): # setup self.alias = alias parsetype = alias.split("/")[1] self.parser = LegalRef({ 'SFS': LegalRef.LAGRUM, 'Short': LegalRef.KORTLAGRUM, 'DV': LegalRef.RATTSFALL, 'Regpubl': LegalRef.FORARBETEN, 'EGLag': LegalRef.EULAGSTIFTNING, 'ECJ': LegalRef.EURATTSFALL }[parsetype]) # this particular test method is set up to use lagen.nu style # URIs because the canonical URIs are significantly different. dirname = os.path.dirname(__file__) basedir = dirname + "/../" space = basedir + "lagen/nu/res/uri/swedishlegalsource.space.ttl" slugs = basedir + "lagen/nu/res/uri/swedishlegalsource.slugs.ttl" extra = [ basedir + "lagen/nu/res/extra/swedishlegalsource.ttl", basedir + "lagen/nu/res/extra/sfs.ttl" ] cfg = Graph().parse(space, format="turtle").parse(slugs, format="turtle") self.metadata = Graph() for ttl in extra: self.metadata.parse(ttl, format="turtle") COIN = Namespace("http://purl.org/court/def/2009/coin#") # select correct URI for the URISpace definition by # finding a single coin:URISpace object spaceuri = cfg.value(predicate=RDF.type, object=COIN.URISpace) self.minter = URIMinter(cfg, spaceuri)
def __init__(self, repos, inifile=None, **kwargs): super(WSGIApp, self).__init__(repos, inifile, **kwargs) sfsrepo = [repo for repo in repos if repo.alias == "sfs"][0] self.parser = SwedishCitationParser( LegalRef(LegalRef.RATTSFALL, LegalRef.LAGRUM, LegalRef.KORTLAGRUM, LegalRef.FORARBETEN, LegalRef.MYNDIGHETSBESLUT), sfsrepo.minter, sfsrepo.commondata, allow_relative=True) graph = Graph().parse(sfsrepo.resourceloader.filename("extra/sfs.ttl"), format="turtle") self.lagforkortningar = [ str(o) for s, o in graph.subject_objects(DCTERMS.alternate) ] self.paragraflag = [] for s, o in graph.subject_objects(DCTERMS.alternate): basefile = sfsrepo.basefile_from_uri(str(s)) distilledpath = sfsrepo.store.distilled_path(basefile) firstpara_uri = str(s) + "#P1" needle = '<rpubl:Paragraf rdf:about="%s">' % firstpara_uri if os.path.exists(distilledpath) and needle in util.readfile( distilledpath): self.paragraflag.append(str(o).lower()) self.lagnamn = [str(o) for s, o in graph.subject_objects(RDFS.label)] self.lagforkortningar_regex = "|".join( sorted(self.lagforkortningar, key=len, reverse=True))
def parser(self): p = LegalRef(LegalRef.LAGRUM, LegalRef.KORTLAGRUM, LegalRef.FORARBETEN, LegalRef.RATTSFALL) # self.commondata need to include extra/sfs.ttl # somehow. This is probably not the best way. with self.resourceloader.open("extra/sfs.ttl") as fp: self.commondata.parse(data=fp.read(), format="turtle") # actually, to mint URIs for rattsfall we need the # skos:altLabel for the rpubl:Rattsfallspublikation -- so we # need everything with self.resourceloader.open("extra/swedishlegalsource.ttl") as fp: self.commondata.parse(data=fp.read(), format="turtle") return SwedishCitationParser(p, self.minter, self.commondata, allow_relative=True)
def parse_document_from_soup(self, soup, doc): # Process text and create DOM self.parser = LegalRef(LegalRef.EGRATTSFALL) textdiv = soup.find("div", "texte") if textdiv: for node in textdiv.childGenerator(): if node.string: # Here we should start analyzing for things like # "C-197/09". Note that the Eurlex data does not use # the ordinary hyphen like above, but rather # 'NON-BREAKING HYPHEN' (U+2011) - LegaRef will mangle # this to an ordinary hyphen. subnodes = self.parser.parse( node.string, predicate="dcterms:references") doc.body.append(Paragraph(subnodes)) else: self.log.warning("%s: No fulltext available!" % celexnum) doc.body.append(Paragraph(["(No fulltext available)"]))
def forarbete_parser(self): return SwedishCitationParser(LegalRef(LegalRef.FORARBETEN), self.minter, self.commondata)
def lagrum_parser(self): return SwedishCitationParser(LegalRef(LegalRef.LAGRUM, LegalRef.EULAGSTIFTNING), self.minter, self.commondata, allow_relative=True)
class LNMediaWiki(MediaWiki): namespaces = SwedishLegalSource.namespaces from ferenda.sources.legal.se.legalref import LegalRef p = LegalRef(LegalRef.LAGRUM, LegalRef.KORTLAGRUM, LegalRef.FORARBETEN, LegalRef.RATTSFALL) keyword_class = LNKeyword lang = "sv" def __init__(self, config=None, **kwargs): super(LNMediaWiki, self).__init__(config, **kwargs) if self.config._parent and hasattr(self.config._parent, "sfs"): self.sfsrepo = SFS(self.config._parent.sfs) else: self.sfsrepo = SFS() def get_wikisettings(self): settings = LNSettings(lang=self.lang) # NOTE: The settings object (the make_url method) only needs # access to the canonical_uri method. settings.make_sfs_url = self.sfsrepo.canonical_uri settings.make_keyword_url = self.keywordrepo.canonical_uri return settings def get_wikisemantics(self, parser, settings): return LNSemantics(parser, settings) def canonical_uri(self, basefile): if basefile.startswith("SFS/") or basefile.startswith("SFS:"): # "SFS/1998:204" -> "1998:204" return self.sfsrepo.canonical_uri(basefile[4:]) else: return super(LNMediaWiki, self).canonical_uri(basefile) def postprocess(self, doc, xhtmltree): # if SFS mode: # create a div for root content # find all headers, create div for everything there if doc.basefile.startswith("SFS/") or doc.basefile.startswith("SFS:"): self.postprocess_commentary(doc, xhtmltree) toplevel_property = False else: toplevel_property = True body = super(LNMediaWiki, self).postprocess(doc, xhtmltree, toplevel_property=toplevel_property) citparser = SwedishCitationParser(self.p, self.config.url) citparser.parse_recursive(body, predicate=None) return body def postprocess_commentary(self, doc, xhtmltree): uri = doc.uri body = xhtmltree.getchildren()[0] newbody = etree.Element("body") curruri = uri currdiv = etree.SubElement(newbody, "div") currdiv.set("about", curruri) currdiv.set("property", "dcterms:description") currdiv.set("datatype", "rdf:XMLLiteral") containerdiv = etree.SubElement(currdiv, "div") for child in body.getchildren(): if child.tag in ("h1", "h2", "h3", "h4", "h5", "h6"): # remove that <span> element that Semantics._h_el adds for us assert child[ 0].tag == "span", "Header subelement was %s not span" % child[ 0].tag child.text = child[0].text child.remove(child[0]) if child.text: if isinstance(child.text, bytes): txt = child.text.decode("utf-8") else: txt = child.text nodes = self.p.parse(txt, curruri) curruri = nodes[0].uri # body.remove(child) newbody.append(child) currdiv = etree.SubElement(newbody, "div") currdiv.set("about", curruri) currdiv.set("property", "dcterms:description") currdiv.set("datatype", "rdf:XMLLiteral") # create a containerdiv under currdiv for reasons containerdiv = etree.SubElement(currdiv, "div") else: # body.remove(child) currdiv[0].append(child) xhtmltree.remove(body) xhtmltree.append(newbody)
def parametric_test(self, datafile): p = LegalRef(LegalRef.LAGRUM) return self._test_parser(datafile, p)
def parametric_test(self, datafile): p = LegalRef(LegalRef.MYNDIGHETSBESLUT) # p.verbose = True return self._test_parser(datafile, p)
def parametric_test(self, datafile): p = LegalRef(LegalRef.EGRATTSFALL) return self._test_parser(datafile, p)
def parametric_test(self, datafile): p = LegalRef(LegalRef.EULAGSTIFTNING) return self._test_parser(datafile, p)
def parametric_test(self, datafile): p = LegalRef(LegalRef.FORARBETEN) return self._test_parser(datafile, p)