def get_pred_objs(self, star): """ Returns a key value of predicate:object in a BGP/star-shaped subquery :param star: list of triple patterns :return: list of predicates """ preds = { utils.getUri(tr.predicate, self.prefixes)[1:-1]: (utils.getUri(tr.theobject, self.prefixes) if tr.theobject.constant else tr.theobject.name) for tr in star if tr.predicate.constant } return preds
def decompose_bgp(self, stars, bgp_preds): bgpstars = {} mtres = {} relevant_mts = {} starnames = sorted(list(stars.keys())) for s in starnames: spred = self.get_pred_objs(stars[s]) bgpstars[s] = {} bgpstars[s]['triples'] = sorted(stars[s]) bgpstars[s]['predicates'] = spred types = self.checkRDFTypeStatemnt(stars[s]) if len(types) > 0: rdfmts = types else: rdfmts = self.config.find_rdfmt_by_preds(spred) bgpstars[s]['rdfmts'] = list(rdfmts.keys()) mtres[s] = bgpstars[s]['rdfmts'] relevant_mts.update(rdfmts) star_conn = self.getStarsConnections(stars) mt_conn = self.getMTsConnection(mtres, bgp_preds, relevant_mts) res = self.prune(star_conn, mt_conn, mtres, stars, relevant_mts) for s in res: bgpstars[s]['rdfmts'] = res[s] for s in res: datasources = {} for m in res[s]: for d in self.config.metadata[m].datasources: dspreds = self.config.metadata[m].datasources[d] preds = list( set(bgpstars[s]['predicates']).intersection(dspreds)) if len(preds) > 0: datasources.setdefault(d, {}).setdefault(m, []).extend(preds) else: datasources.setdefault(d, {}).setdefault( m, []).extend(dspreds) if len(bgpstars[s]['predicates']) == 0: preds = { tr.predicate.name: (utils.getUri(tr.theobject, self.prefixes) if tr.theobject.constant else tr.theobject.name) for tr in stars[s] } bgpstars[s]['predicates'] = preds if len(datasources) == 0: return [], [], [] bgpstars[s]['datasources'] = datasources return bgpstars, star_conn, mt_conn
def get_preds(self, star): """ Returns a set of predicates in a BGP/star-shaped subquery :param star: list of triple patterns :return: list of predicates """ preds = [utils.getUri(tr.predicate, self.prefixes)[1:-1] for tr in star if tr.predicate.constant] return preds
def checkRDFTypeStatemnt(self, ltr): types = self.getRDFTypeStatement(ltr) typemols = {} for t in types: tt = utils.getUri(t.theobject, self.prefixes)[1:-1] if tt in self.config.metadata: mt = self.config.metadata[tt] typemols[tt] = mt if len(types) > 0 and len(typemols) == 0: return {} return typemols
def prune(self, star_conn, res_conn, selectedmolecules, stars, relevant_mts): newselected = {} res = {} counter = 0 for s in selectedmolecules: if len(selectedmolecules[s]) == 1: newselected[s] = list(selectedmolecules[s]) res[s] = list(selectedmolecules[s]) counter += 1 else: newselected[s] = [] res[s] = [] if counter == len(selectedmolecules): return res # check predicate level connections newfilteredonly = {} for s in res: sc = [c for c in star_conn if s in star_conn[c]['SO']] for c in sc: connectingtp = [utils.getUri(tp.predicate, self.prefixes)[1:-1] for tp in stars[s] if tp.theobject.name == c] connectingtp = list(set(connectingtp)) sm = selectedmolecules[s] for m in sm: srange = [p for r in relevant_mts[m].predicates for p in relevant_mts[m].predicates[r].ranges if relevant_mts[m].predicates[r].predicate in connectingtp] srange = list(set(srange).intersection(selectedmolecules[c])) # if len(srange) == 0: # selectedmolecules[s].remove(m) if c in newfilteredonly: newfilteredonly[c].extend(srange) else: newfilteredonly[c] = srange newfilteredonly[c] = list(set(newfilteredonly[c])) already_checked = [] for s in res: sc = [c for c in star_conn if s in star_conn[c]['SO']] for c in sc: if s + c in already_checked or c + s in already_checked: continue already_checked.extend([s + c, c + s]) if c in newfilteredonly: c_newfilter = newfilteredonly[c].copy() else: c_newfilter = selectedmolecules[c].copy() newfilteredonly[c] = selectedmolecules[c].copy() if s in newfilteredonly: s_newfilter = newfilteredonly[s].copy() else: s_newfilter = selectedmolecules[s].copy() newfilteredonly[s] = selectedmolecules[s].copy() for m in s_newfilter: con = res_conn[m] if len(con) == 0: continue new_res = list(set(con).intersection(c_newfilter)) if len(new_res) == 0: newfilteredonly[s].remove(m) # for m in c_newfilter: # con = res_conn[m] # if len(con) == 0: # continue # new_res = list(set(con).intersection(s_newfilter)) # if len(new_res) == 0: # newfilteredonly[c].remove(m) for s in newfilteredonly: res[s] = list(set(newfilteredonly[s])) for s in res: if len(res[s]) == 0: res[s] = selectedmolecules[s] res[s] = list(set(res[s])) return res