def _setURImaps(): self._uri_sl_bv = mtutils.sdict() # baselabel:basevalue map self._uri_pv_sl = mtutils.sdict() # preferredvalue:systemlabel map for c,slp in enumerate(self._sURIpreds): sln = 'native_'+slp[4:] if base is not None: # map between base uri parts and system labels try: v = reuri.match(self._getURIbase()).group(c+1) if v is not None: v = v.strip() if v: self._uri_sl_bv[sln] = v # map uri base label:value except: pass for sl,pv in mtalonto.items(): self._uri_pv_sl[pv] = sl # also map preferred to system ontology term for sl,pv in mtalonto.items(): # also be sure to index the system ontology terms in the preferred map if sl not in self._uri_pv_sl: self._uri_pv_sl[sl] = sl # if not already indexed: include system terms in the preferred map
def __init__(self ,usecase=None ,splitmatch=None ,splitmodes=None ): # implementation values self._idx = mtutils.sdict() # index # API updatable values self._update(usecase=usecase ,splitmatch=splitmatch ,splitmodes=splitmodes )
def _getStoi(self): # usage: # gets the stored triple ordering index of self # from cache if available # else derives it from self._getTripleOrder() caching result # returns: # toi - dict of triple ordering by element id {elem0_id:order0,elem1_id:order1,elem2_id:order2} try: return self._stoi except AttributeError: stoi = mtutils.sdict() for c,i in enumerate(self._getTripleOrder()): stoi[i] = c self._stoi = stoi return stoi
def _exists(self ,store ,request=None ,bound=mtutils.sdict() ): # usage: # checks if solutions exist to semantic web queries # inputs: # store - triple store to query # request - generate|export|instantiate # bound - variables constrained from start - input from import or a parent query process # returns: # exists - True|False self._update(request=request) self._showDebug(clas='Result',method='_generate',note='inputs',line=449,level=0,vars=[['store',store],['request',request],['bound',bound]]) exists = False for t1 in self._getRequest()._solve(store,bound=bound,mode='results'): exists = True break return exists
def _parseURI(self,target,uri,mode=['query']): # usage: # # for storage: defining modelled URI data # also now for query and delivery: replacing _expressURI() # # validates and extracts data from a URI # if URI matches current base+basesuffix then component labels are substituted for values # inputs: # self - the source store # target - the target store # uri - the uri # mode - how to interpret the URI any one or more of [query|parts] as string or list of strings: # - null - set to 'off' # - off - treat as if URIsupport is off (if combined with other modes these will be dropped as they are meaningless) # - query - get URI bits needed for query processing or results delivery (automatically also sets parts & preferred modes) # returns: has_nativeuri, fulluri, base, fragment and if supplied has_urifragment, has_uribase # where: # - has_nativeuri (runtime) - indexed uri value # - has_rdfuri (runtime) - uri for export (system labels prefixed with system base) # - has_uribase (modelled) - uribase (how the uribase is labelled) # - has_properuribase (runtime) - literal uri base (what it is not how it is labelled) # - has_urifragment (modelled) - stored uri fragment (either a value or system label) # - parts - interpret modelled URI parts (otherwise just process the URI) # (if URIsupport is off an empty dict is returned # - has_uriprotocol (modelled) - base protocol (typically http) # - has_uriauthority (modelled) - base domain (e.g. www.semanticle.com) # - has_uripath (modelled) - base domain path suffix (file path in unix notation) # - has_uriquery (modelled) - base query (http GET parameters for a dynamic page) # returns: # d - dict of available (not None) result values indexed by their identifying system predicate labels # ie. {uri_bit_system_predicate1:uri_bit_value1,..,uri_bit_system_predicateN:uri_bit_valueN} # Note: Also ensures initialisation of the 2 uri mapping indexes. # - self._uri_sl_bv (for result element conversion) indexes URI part base values by their corresponding system labels # - self._uri_pv_sl (for query elememt conversion) indexes URI part system labels by their corresponding preferred values # Explanation: # uri maps shouldn't persist since underlying uri preferences may change # and if they do, updating the maps won't impact the integrity of existing data # since all persistent data is essentially independent of uri preference settings. # validate the mode def _setURImaps(): self._uri_sl_bv = mtutils.sdict() # baselabel:basevalue map self._uri_pv_sl = mtutils.sdict() # preferredvalue:systemlabel map for c,slp in enumerate(self._sURIpreds): sln = 'native_'+slp[4:] if base is not None: # map between base uri parts and system labels try: v = reuri.match(self._getURIbase()).group(c+1) if v is not None: v = v.strip() if v: self._uri_sl_bv[sln] = v # map uri base label:value except: pass for sl,pv in mtalonto.items(): self._uri_pv_sl[pv] = sl # also map preferred to system ontology term for sl,pv in mtalonto.items(): # also be sure to index the system ontology terms in the preferred map if sl not in self._uri_pv_sl: self._uri_pv_sl[sl] = sl # if not already indexed: include system terms in the preferred map # END _setURImaps() if not mode: mode = ['off'] # null mode means mode = ['off'] elif isinstance(mode,basestring): mode = [mode] # ensure mode is a list if 'off' not in mode: # if uri support is not off: try: supp = self._getURIsupport() # try getting URIsupport except: supp = 'off' # except default to 'off' if supp == 'off': if 'parts' in mode: return {} else: mode = ['off'] else: mode = ['off'] # get uri bases(s) try: base = self._getURIbase() # try: getting the full uri base for the source store except AttributeError: base = None # except: its None if target == self: tbase = base # and use as target base if the same else: # else: (get target base seperately) try: tbase = target._getURIbase() # try: getting the full uri base for the target store except: tbase = None # except: its None setmaps = False # assume map indexes exist # check uri maps try: # try: if not self._uri_pv_sl: setmaps = True # if map index empty: flag to set it except AttributeError: setmaps = True # except no map index: also flag to set it if setmaps: _setURImaps() # if map indexes not already set: (proceed to do so) # proceed to split the uri d = mtutils.sdict() # initialise results dict parts_done = False # parts not parsed is_label = False # assume fragment isn't a system label ns = f = b = bl = None # default fragment, base and base label are None if 'off' not in mode: # but if URIsupport not off: r = self._splitURI(uri) # is it a URI which can be split into base and fragment? if r is not None: b = bl = r[0]; f = r[1] # if URI could be split: set base and fragment if f is None: # if no valid fragment:authority pair found: (check if pairing with ns) try: # try ns = rebse.match(uri).group(1) # parsing the namespace if ns: # parse hasn't failed so if namespace not null: bs = self._getBSfromNS(ns) # get base from namespace (if avail) if bs is not None: # if base was available for the namespace: b = b1 = bs # set it as base and base label f = uri[len(ns)+1:] # also set the fragment except: pass # except: keep defaults if f is None: f = uri # still no fragment: default to the uri if f: # if fragment not null: if 'off' in mode or f[0] == '?': # if URI support is off or fragment is a query variable: d['has_nativeuri'] = f # set the native URI value to the fragment value d['has_rdfuri'] = f # set the export URI value to the fragment value d['has_displayuri'] = f # set the (native) display URI to the fragment value is_label = True # flag labelled else: # else fragment is a data value needing URI support: fp = mtalonto._getItem(f,f) # get the preferred value for the fragment (if any) if f in self._uri_pv_sl: # if fragment is a preferred label: (remember these also reverse indexed) d['has_displayuri'] = fp # set the (native) display URI to the preferred fragment value f = self._uri_pv_sl[f] # reset fragment to its corresponding system label d['has_nativeuri'] = f # set the native URI value to the reset fragment value is_label = True # flag labelled elif f[:2] == '##' and int(f[2:]) > 0: # elif fragment is a validated internal triple id: d['has_nativeuri'] = f # set the native URI value to the fragment value d['has_displayuri'] = f # set the (native) display URI to the fragment value is_label = True # flag labelled else: # else fragment is neither a preferred label nor an internal triple id: if b is None: bl = b = base # so if uri base ommitted assume its the source base if b is not None: # if base exists: if b == tbase or b == self._URIsystembase: # and its that of the target or the software: d['has_nativeuri'] = f # set the native URI value to the fragment value d['has_displayuri'] = fp # set the (native) display URI to the preferred fragment value bl = 'uribase' # set URI base value to its system label parts_done = True # flag component extraction done if 'parts' in mode: # if remaining URI parts required: for c in range(4): # for each URI bit (except uri and base): pred = self._sURIpreds[c] # get the predicate for this bit noun = 'native_'+pred[4:] # derive the noun linked to the predicate if noun in self._uri_sl_bv: # if corresponding fullbase bit value is not None: d[pred] = noun # set uri system noun indexed by uri system pred else: # else base not of the target: d['has_nativeuri'] = b+'#'+f # so include it in has_nativeuri if ns is None: # if ns is None: try: ns = target._getNSfromBS(b) # try: to get it from URI base (if avail) except: pass # except: leave as None if ns is None: ns = b # if ns is still None: use the base d['has_displayuri'] = ns+'#'+fp # form the displayuri from the namespace, seperator and pref fragment d['has_urifragment'] = f # set the URI fragment value if 'has_nativeuri' not in d: d['has_nativeuri'] = uri # catch_all default has_nativeuri instantiation if 'has_displayuri' not in d: d['has_displayuri'] = uri # catch_all default has_displayuri instantiation if bl is not None: d['has_uribase'] = bl # set the URI base label value # parse query values (if needed) if 'query' in mode: # if parsing for query: if f in self._uri_sl_bv: f = self._uri_sl_bv[f] # if fragment a system label for base value: switch it to base value if is_label: # if fragment is a system label: if 'has_rdfuri' not in d: # if rdfuri not set: d['has_rdfuri'] = self._URIsystembase+'#'+f # set rdfuri to the URI systembase + seperator + fragment value else: # else its a value not a label: (so unification & rfd uri are equal) quri = uri # use supplied uri as a default queryable uri if b is not None: # if base exists: d['has_uribase'] = b # set it as the queryable base if f: quri = b+'#'+f # and prefix to the fragment (if this exists) as the queryable uri d['has_rdfuri'] = quri # set has_rdfuri to the queryable uri # parse part value (if needed and not already done) if 'parts' in mode: # if parts mode: if 'has_uribase' in d and not parts_done: # if uribase exists and parts not done: (proceed) for c in range(4): # for parts except has_uribase and has_urifragment: p = reuri.match(uri).group(c+1) # get the bit value if p is not None: p = p.strip() if p: d[self._sURIpreds[c]] = p # only if bit value exists instantiate it in results dict for p in d: # check each parts mode predicate in results: if p not in self._URIpredsModelled: del d[p] # if its not required for the model: delete it return d # return results dict
def _initDicts(): self._uri_ns_bs = mtutils.sdict() # initialise namespace to URI base dict self._uri_bs_ns = mtutils.sdict() # initialise URI base to namespace dict
def _setItems(self,items): if not items: self._items = sdict() elif isinstance(items,sdict): self._items = items return self._items
def _reset(self): self._items = sdict() def _setItems(self,items):
def _generate(self ,store=None ,results=None ,header=None ,footer=None ,author=None ,title=None ,bound=mtutils.sdict() ,infer=None ,order=None ,update=True ): # generate outputs from semantic web queries # supports variety of OutputPatterns or renderings (triples|relations) and OutputProtocols (text|CSV|Relational) # so its its the basic driver for plug and play query driven export # usage: # inputs: # store - triple store to query # outputs - outputs to generate # header - result output heading string or [None|False] # None - not specified, try default, or try overiding with result or output specific headers # False - turns termination off overiding result or output specific headers # footer - result output footing string or [None|False] # None - not specified, try default, or try overiding with result or output specific footers # False - turns termination off overiding result or output specific footers # author - run author variable for value substitution in headers and footers # title - run title variable for value substitution in headers and footers # bound - bound values to input # infer - inference setting [True|False|None|'ffi] # order - Triple Ordering for IO expressed as a sequence of 's','p' and 'o' characters. # update - update default store, results, header, footer, author, title and order object settings [True|False] # if update: self._update(store=store # instantiate ad-hoc Content instance variables ,results=results # as "sticky" updates ,header=header ,footer=footer ,author=author ,title=title ,order=order ) self._showDebug(clas='Facade',method='_generate',note='inputs',line=264,level=0,vars=[['store',store],['results',results],['bound',bound]]) if self._default_header is False: header = False # only use Facade specific header if default not False elif header is None: header = self._getHeader() if self._default_footer is False: footer = False # only use Facade specific footer if default not False elif footer is None: footer = self._getFooter() if author is None: author = self._getAuthor() if title is None: title = self._getTitle() store = self._getStore() order = self._getOrder() # get Facade triple order setting if not order: order = store._getIoTripleOrder() # otherwise get store ioTripleOrder setting (if any) if order: order = mtstores.TripleStore(tripleOrder=order) # if order setting found: express as a TripleStore for r in self._getResults(): # for each result request: if order: r = r._rtClone(order,store) # if re-ordering: do from (io)order to (store)order r._generate(store # generate the results for re-ordered request ,bound=bound ,header=header ,footer=footer ,author=author ,title=title ,facade=self ,infer=infer )
def _generate(self ,store ,request=None ,outputs=None ,header=None ,footer=None ,author=None ,title=None ,bound=mtutils.sdict() ,facade=None ,infer=None ): # usage: # generate outputs from semantic web queries # supports variety of OutputPatterns or renderings (triples|relations) and OutputProtocols (text|CSV|Relational) # so its its the basic driver for plug and play query driven export # inputs: # store - triple store to query # request - generate|export|instantiate # outputs - targets (content objects to generate or stores/files to instantiate) # header - updated Result header string or False (if any) # footer - updated Result footer string or False (if any) # author - updated Result author string (if any) # title - updated Result title string (if any) # bound - variables constrained from start - input from import or a parent query process # facade - facde invoking object if any # infer - use inference to solve [True|False|None|'ffi'] - overrides local or preference settings def defaultheadfoot(role,hfstring,hfvars): if hfstring is not False: if not outputs: hfstring = hfvars._doSubs(hfstring) print hfstring else: for output in outputs: output._defaultheadfooter(role ,hfstring ,hfvars ) self._update(request=request # instantiate ad-hoc Result instance variables ,outputs=outputs # as "sticky" updates ,header=header ,footer=footer ,author=author ,title=title ) self._showDebug(clas='Result',method='_generate',note='inputs',line=385,level=0,vars=[['store',store],['request',request],['outputs',outputs],['bound',bound]]) display = False # default no display outputs = self._getOutputs() # check outputs or default to display for output in outputs: # for each output (if any): if 'display' in output._getTargets(): # if display is a target: display = True # set display on break # quit check for output in outputs: # for each output (if any): if not (display or output._getTargets()): # if no supplied target or no display output._setTargets('display') # set target to display if facade is not None: # if facade not None: default to its variables for header = facade._getHeader() # header footer = facade._getFooter() # footer author = facade._getAuthor() # author title = facade._getTitle() # title if header is not False: # attempt overriding Facade header if not False header1 = self._getHeader() # with Result header if header1 is not None: header = header1 # if not None if footer is not False: # attempt overriding Facade footer if not False footer1 = self._getFooter() # with Result footer if footer1 is not None: footer = footer1 # if not None author1 = self._getAuthor() # attempt overriding Facade author if author1 is not None: author = author1 # with Result author if not None title1 = self._getTitle() # attempt overriding Facade title if title1 is not None: title = title1 # with Result title if not None hfvars = mtutils.sdict() # header/footer variables dict hfvars['author'] = author # set author variable hfvars['title'] = title # set title variable start = time.time() # get start time hfvars['start'] = time.ctime() # set formatted start time defaultheadfoot('header',header,hfvars) # output header for i,t1 in enumerate(self._getRequest()._solve(store # for each enumerated solution: ,bound=bound ,mode='results' ,infer=infer )): self._showDebug(clas='Result',method='_generate',line=421,level=2,vars=[['t1',t1],['outputs',outputs]]) if not outputs: # if no outputs: v = [store._expandTriple(t) for t in t1['!triples']] # default to display of matching triples print mtprotocol.CsvProtocol()._translate(v,endline='no') else: # generate specified outputs for output in outputs: # for each output: output._generate(bound=t1 # generate the output ,instance=i ,store=store ) duration = time.time()-start # get duration hfvars['end'] = time.ctime() # set formatted end time hfvars['duration'] = str(round(duration*100)/100)+' seconds' # set rounded duration string defaultheadfoot('footer',footer,hfvars) # output footer
def __init__(self, select=None, filepath=None, filename=None): self._rules = [] self._rulesUsed = mtutils.slist() self._cache = mtutils.sdict() self._i_rules = mtutils.sdict() self._update(select=select, filepath=filepath, filename=filename)
def parse1(s, l_rhs, level, lhs, c): # recursive parser of rhs clause list def parse2(s, rhs, level, lhs, c): # parser of single rhs clause self._showDebug( clas="Parser", method="_parse", note=".parse.parse1.parse2 inputs", line=243, level=0, vars=[["s", s], ["rhs", rhs], ["level", level], ["lhs", lhs], ["c", c]], ) smatch = bmatch = "" # prepare to get match and sub_binding bound1 = {} s1 = s if isinstance(rhs, basestring): # if string for matching rhs = rhs.strip() # strip it if rhs.startswith("<") and rhs.endswith(">"): # if a nested clause for s1, match, bound1 in parse(s, rhs, level=level + 1): # for each parsed rhs: bound = bound1 # temp sub_binding store if lhs in bound1: # if rhs clause is recursive del bound1[lhs] # remove it from temp binding self._i_rules[lhs][c][1] -= 1 # remove it from count of BNF clauses used self._showDebug( clas="Parser", method="_parse", note=".parse.parse1.parse2 yields", line=255, level=0, vars=[["s1", s1], ["match", match], ["bound", bound], ["bound1", bound1]], ) yield s1, match, bound, bound1 # yield match data else: # if a simple text matching if s1.startswith(rhs): # if match smatch = bmatch = rhs # record solving and binding matches elif isinstance(rhs, mtpexits.Test): tmp = rhs._eval(mtpexits.Element(s)) if tmp: smatch = tmp[0] bmatch = tmp[1] elif rhs.match(s): # else if a regex match ep = self._i_rules[lhs][c][2] # get regex parts smatch, bmatch = mtutils._evalRegex( s, rhs, ep=ep ) # get solving & binding matches from regex self._showDebug( clas="Parser", method="_parse", note=".parse.parse1.parse2 yields", line=268, level=1, vars=[["smatch", smatch], ["bmatch", bmatch]], ) if bmatch: # if match recorded s1 = s[len(smatch) :] # remove solving match from the remaining sentence bound = match = bmatch # temp bound and match store self._showDebug( clas="Parser", method="_parse", note=".parse.parse1.parse2 yields", line=272, level=0, vars=[["s1", s1], ["match", match], ["bound", bound], ["bound1", bound1]], ) yield s1, match, bound, bound1 # yield match data self._showDebug( clas="Parser", method="_parse", note=".parse.parse1 inputs", line=274, level=0, vars=[["s", s], ["l_rhs", l_rhs], ["level", level], ["lhs", lhs], ["c", c]], ) for s, match, bound, bound1 in parse2( s, l_rhs[0], level, lhs, c ): # for each match of first rhs clause if match: # if match: self._showDebug( clas="Parser", method="_parse", note=".parse.parse1", line=277, level=2, vars=[["s", s], ["lhs", lhs], ["match", match], ["bound", bound], ["bound1", bound1]], ) i_match = mtutils.sdict() # set default match index i_bound = mtutils.sdict() # set default binding index i_match = i_match._deepUnion({lhs: [match]}) # record this clause match in i_match self._i_rules[lhs][c][1] += 1 # increment BNF clause usage count if lhs not in self._rulesUsed: self._rulesUsed += [lhs] # flag usage of this clause self._showDebug( clas="Parser", method="_parse", note=".parse.parse1", line=283, level=2, vars=[["lhs", lhs], ["bound1", bound1], ["self._getSelect()", self._getSelect()]], ) i_bound = i_bound._deepUnion(bound1) # index sub_binding in i_bound if not self._getSelect() or lhs in self._getSelect(): # if this binding required i_bound = i_bound._deepUnion({lhs: [bound]}) # add it to bound index if len(l_rhs) == 1: # if only 1 rhs clause self._showDebug( clas="Parser", method="_parse", note=".parse.parse1 yields", line=288, level=0, vars=[["s", s], ["i_match", i_match], ["i_bound", i_bound]], ) yield s, i_match, i_bound # yield it as a match else: # else (more clauses): for s, i_match1, i_bound1 in parse1( s, l_rhs[1:], level, lhs, c ): # parse the remaining rhs clauses i_match = i_match._deepUnion(i_match1) # index match in i_match i_bound = i_bound._deepUnion(i_bound1) # index sub_binding in i_bound self._showDebug( clas="Parser", method="_parse", note=".parse.parse1 yields", line=294, level=0, vars=[["s", s], ["i_match", i_match], ["i_bound", i_bound]], ) yield s, i_match, i_bound # and yield match data
def _setI_Rules(self): # pre-process rules into strings or copiled regex # splitting regex parts and counting match groups # in i_rules indexed by lhs # with rhs sorted by inverse (clause count, rhs clause string length) i_rules = mtutils.sdict() for rule in self._rules: equation = rule.split("::-") if len(equation) == 2: # only if lhs and rhs ok = 1 # default ok uc = 0 # dummy usage count for later sorting by most used first ep = mtutils.slist() for c in range(2): equation[c] = equation[c].strip() # strip lhs and rhs if equation[1].startswith("/") and equation[1].endswith("/"): # rhs is a regex ep = mtutils._splitRegex(equation[1]) try: # try processing regex: ec, pc = mtutils._pdepth(ep[0]) # count and validate inner parenthesis if ec == 1: raise mterrors.UnbalancedParenthesisError(ep[0]) # test error code to raise errors elif ec == 2: raise mterrors.UnspecifiedParenthesisError() self._showDebug( clas="Parser", method="_setI_Rules", line=177, level=1, vars=[["equation[1]", equation[1]], ["ep", ep], ["ok", ok], ["pc", pc]], ) if not ep[0].startswith("^"): ep[0] = "^" + ep[0] # ensure it matches from beginning try: equation[1] = re.compile(ep[0]) # try compiling the pattern except: raise mterrors.ParseInvalidRegexError(ep[0]) # escalate failure except mterrors.ParseError, X: X._notify(c="Parser", m="_setI_Rules()") # notify ParseError elif equation[1].startswith("{") and equation[1].endswith("}"): # rhs is a userexit Element method equation[1] = mtpexits.Test(equation[1][1:-1].strip()) # instantiate rhs a userexit Test else: # rhs is a string or clause(s) equation[1] = safesplit.sub(">~#!@!#~<", equation[1]) # sub special clause seperator self._showDebug( clas="Parser", method="_setI_Rules", line=188, level=1, vars=[["equation[0]", equation[0]], ["equation[1]", equation[1]]], ) if ok: # add to i_rules if OK if equation[0] not in i_rules: # if rhs not in i_rules[equation[0]] = [[equation[1], uc, ep]] # add it self._showDebug( clas="Parser", method="_setI_Rules", line=192, level=1, vars=[["equation[0]", equation[0]]] ) else: # insert new lhs into list sorted by size l1 = len(i_rules[equation[0]]) # set up manual break and list counter c = -1 while c + 1 < l1: c += 1 self._showDebug( clas="Parser", method="_setI_Rules", line=198, level=2, vars=[ ["l1", l1], ["c", c], ["i_rules[equation[0]][c]", i_rules[equation[0]][c]], ["equation[1]", equation[1]], ], ) if i_rules[equation[0]][c][1] < uc: # if new rhs has more clauses i_rules[equation[0]][c:c] = [ [equation[1], uc, ep] ] # insert it here into the rhs list c = l1 # and break elif i_rules[equation[0]][c][0] == equation[1]: c = l1 # elif rhs is not new break elif c + 1 == l1: # elif at end of list i_rules[equation[0]] += [[equation[1], uc, ep]] # append new rhs to list self._showDebug( clas="Parser", method="_setI_Rules", line=205, level=1, vars=[["i_rules[equation[0]]", i_rules[equation[0]]]], )