def translateAggregates(q, M): E = [] A = [] # import pdb; pdb.set_trace() # collect/replace aggs in : # select expr as ?var if q.evar: es = [] for e, v in zip(q.expr, q.evar): e = traverse(e, functools.partial(_sample, v=v)) e = traverse(e, functools.partial(_aggs, A=A)) es.append(e) q.expr = es # having clause if traverse(q.having, _hasAggregate, complete=False): q.having = traverse(q.having, _sample) traverse(q.having, functools.partial(_aggs, A=A)) # order by if traverse(q.orderby, _hasAggregate, complete=False): q.orderby = traverse(q.orderby, _sample) traverse(q.orderby, functools.partial(_aggs, A=A)) # sample all other select vars # TODO: only allowed for vars in group-by? if q.var: for v in q.var: rv = Variable('__agg_%d__' % (len(A) + 1)) A.append(CompValue('Aggregate_Sample', vars=v, res=rv)) E.append((rv, v)) return CompValue('AggregateJoin', A=A, p=M), E
def translateQuery(q, base=None, initNs=None): """ Translate a query-parsetree to a SPARQL Algebra Expression Return a rdflib_sparql.sparql.Query object """ # We get in: (prologue, query) prologue = translatePrologue(q[0], base, initNs) # absolutize/resolve prefixes q[1] = traverse( q[1], visitPost=functools.partial(translatePName, prologue=prologue)) P, PV = translate(q[1]) datasetClause = q[1].datasetClause if q[1].name == 'ConstructQuery': template = triples(q[1].template) if q[1].template else None res = CompValue(q[1].name, p=P, template=template, datasetClause=datasetClause) else: res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV) res = traverse(res, visitPost=simplify) return Query(prologue, res)
def _sample(e, v=None): """ For each unaggregated variable V in expr Replace V with Sample(V) """ if isinstance(e, CompValue) and e.name.startswith("Aggregate_"): return e # do not replace vars in aggregates if isinstance(e, Variable) and v != e: return CompValue('Aggregate_Sample', vars=e)
def translateValues(v): # if len(v.var)!=len(v.value): # raise Exception("Unmatched vars and values in ValueClause: "+str(v)) res = [] if not v.var: return res if not v.value: return res if not isinstance(v.value[0], list): for val in v.value: res.append({v.var[0]: val}) else: for vals in v.value: res.append(dict(zip(v.var, vals))) return CompValue('values', res=res)
def Extend(p, expr, var): return CompValue('Extend', p=p, expr=expr, var=var)
def Filter(expr, p): return CompValue('Filter', expr=expr, p=p)
def LeftJoin(p1, p2, expr): return CompValue('LeftJoin', p1=p1, p2=p2, expr=expr)
def BGP(triples=None): return CompValue('BGP', triples=triples or [])
def Graph(term, graph): return CompValue('Graph', term=term, p=graph)
def ToMultiSet(p): return CompValue('ToMultiSet', p=p)
def Minus(p1, p2): return CompValue('Minus', p1=p1, p2=p2)
def Join(p1, p2): return CompValue('Join', p1=p1, p2=p2)
def Union(p1, p2): return CompValue('Union', p1=p1, p2=p2)
def Project(p, PV): return CompValue('Project', p=p, PV=PV)
def translate(q): """ http://www.w3.org/TR/sparql11-query/#convertSolMod """ # import pdb; pdb.set_trace() _traverse(q, _simplifyFilters) q.where = traverse(q.where, visitPost=translatePath) # TODO: Var scope test VS = set() traverse(q.where, functools.partial(_findVars, res=VS)) # all query types have a where part M = translateGroupGraphPattern(q.where) aggregate = False if q.groupby: conditions = [] # convert "GROUP BY (?expr as ?var)" to an Extend for c in q.groupby.condition: if isinstance(c, CompValue) and c.name == 'GroupAs': M = Extend(M, c.expr, c.var) c = c.var conditions.append(c) M = Group(p=M, expr=conditions) aggregate = True elif traverse(q.having, _hasAggregate, complete=False) or \ traverse(q.orderby, _hasAggregate, complete=False) or \ any(traverse(x, _hasAggregate, complete=False) for x in q.expr or []): # if any aggregate is used, implicit group by M = Group(p=M) aggregate = True if aggregate: M, E = translateAggregates(q, M) else: E = [] # HAVING if q.having: M = Filter(expr=and_(*q.having.condition), p=M) # VALUES if q.valuesClause: M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause))) if not q.var and not q.expr: # select * PV = list(VS) else: PV = list() if q.var: for v in q.var: if v not in PV: PV.append(v) if q.evar: for v in q.evar: if v not in PV: PV.append(v) E += zip(q.expr, q.evar) for e, v in E: M = Extend(M, e, v) # ORDER BY if q.orderby: M = OrderBy(M, [CompValue('OrderCondition', expr=c.expr, order=c.order) for c in q.orderby.condition]) # PROJECT M = Project(M, PV) if q.modifier: if q.modifier == 'DISTINCT': M = CompValue('Distinct', p=M) elif q.modifier == 'REDUCED': M = CompValue('Reduced', p=M) if q.limitoffset: offset = 0 if q.limitoffset.offset!=None: offset = q.limitoffset.offset.toPython() if q.limitoffset.limit!=None: M = CompValue('Slice', p=M, start=offset, length=q.limitoffset.limit.toPython()) else: M = CompValue('Slice', p=M, start=offset) return M, PV
def Group(p, expr=None): return CompValue('Group', p=p, expr=expr)
def OrderBy(p, expr): return CompValue('OrderBy', p=p, expr=expr)