def default(self, line): try: safeSampleEstimates = [] safeSampleTimes = [] karpLubyEstimates = [] karpLubyTimes = [] naiveEstimates = [] naiveTimes = [] exactProb = 0 queryStr = line queryDNF = parse(queryStr) print "Query: ", queryDNF try: plan = getPlan(queryDNF) querySQL = plan.generateSQL_DNF() print algorithm.getPrettySQL(querySQL), "\n" if self.graphQueryPlan: drawTree(plan, self.graphQueryPlanFile) print "\nQuery plan saved to %s" % self.graphQueryPlanFile if self.execSQL: exactProb = printProbability(querySQL) except algorithm.UnsafeException: if self.sample: try: startTime = time.time() print ("Query unsafe, ", "trying to find safe residual query") (relationsToSample, residualDNF, querySQL, relsObjects) = algorithm.\ findSafeResidualQuery(queryDNF) print ("Relations to sample: ", ', '.join(relationsToSample)) print "Residual Query: ", residualDNF print algorithm.getPrettySQL(querySQL), "\n" if self.execSQL: ssExecutor = safe.SafeSample(conn) estimate = ssExecutor.safeSample( relationsToSample, relsObjects, querySQL, self.numSamples, self.epsilon, self.delta) print "SafeSample Estimate:", estimate safeSampleTimes = ssExecutor.sampleTimes print "SafeSample Total Time: %f seconds" % ( time.time() - startTime) print "SafeSample Mean Sample Time: %f seconds" % ( sum(safeSampleTimes) / float(len(safeSampleTimes))) safeSampleEstimates = ssExecutor.getEstimates() print "SafeSample Number of Samples: " + \ "%d (%d + %d to estimate variance)" % ( len(safeSampleEstimates) + ssExecutor.step2NumSamples, len(safeSampleEstimates), ssExecutor.step2NumSamples) except algorithm.UnsafeException: print "Error: No safe residual query found" else: print "Query is unsafe" if self.naive: print "" startTime = time.time() naiveExecutor = naive.NaiveSampler(conn) estimate = naiveExecutor.naiveSample(queryDNF, self.numSamples) print "Naive Sampler Estimate: %f (%d samples)" % ( estimate, self.numSamples) naiveEstimates = naiveExecutor.estimatesRecord naiveTimes = naiveExecutor.sampleTimes print "Naive Sampler Total Time: %f seconds" % ( time.time() - startTime) print "Naive Sampler Mean Sample Time: %f seconds" % ( sum(naiveTimes) / float(len(naiveTimes))) if self.karpluby: print "" startTime = time.time() klExecutor = karp_luby.KarpLuby(conn) estimate = klExecutor.karpLuby( queryDNF, self.numSamples, self.epsilon, self.delta) print "Karp-Luby Estimate:", estimate karpLubyTimes = klExecutor.sampleTimes print "Karp-Luby Total Time: %f seconds" % ( time.time() - startTime) print "Karp-Luby Mean Sample Time: %f seconds" % ( sum(karpLubyTimes) / float(len(karpLubyTimes))) karpLubyEstimates = klExecutor.getEstimates() print "Karp-Luby Number of Samples: %d (%d + %d to estimate variance)" % ( len(karpLubyEstimates) + klExecutor.step2NumSamples, len(karpLubyEstimates), klExecutor.step2NumSamples) print "Karp-Luby Stopping Rule Samples " + \ "(included in total above): %d" % ( klExecutor.step1NumSamples) except ParseError: print "Failed to parse"
def prepareQuery(self, query): maxComponentRels = max( [len(c.getRelations()) for c in query.getConjuncts()]) conjunctSQL = [] for conjunct in query.getConjuncts(): relations = conjunct.getRelations() relationNamesUsed = {} relationSQLIds = {} for rel in relations: relName = rel.getName() if relName in relationNamesUsed: relationSQLIds[rel] = relName + \ str(relationNamesUsed[relName] + 1) relationNamesUsed[relName] += 1 else: relationSQLIds[rel] = relName + str(1) relationNamesUsed[relName] = 1 relationNames = [] for rel in relations: relName = rel.getName() relationNames.append("%s as %s" % (relName, relationSQLIds[rel])) relationsSQL = ', '.join(relationNames) relationsUsed = {} selectAtts = [] equalityConstraints = [] for rel in relations: if rel.isNegated(): selectAtts.append("'%s', -%s.id, %s.p" % (rel.getName(), relationSQLIds[rel], relationSQLIds[rel])) else: selectAtts.append("'%s', %s.id, %s.p" % (rel.getName(), relationSQLIds[rel], relationSQLIds[rel])) for i, constant in enumerate(rel.getConstraints()): if not constant: continue elif constant > 0: equalityConstraints.append( "%s.v%d = %d" % (relationSQLIds[rel], i, constant)) else: equalityConstraints.append( "%s.v%d != %d" % (relationSQLIds[rel], i, -1 * constant)) if len(relations) < maxComponentRels: for dummy in range(len(relations), maxComponentRels): selectAtts.append("'', 0, 0") selectSQL = ', '.join(selectAtts) varPositions = [ component.getVarPositions() for component in conjunct.getComponents() ] for componentVarPositions in varPositions: for v in componentVarPositions.keys(): relsWithVar = componentVarPositions[v].keys() for i in range(len(relsWithVar) - 1): positionsRelI = componentVarPositions[v][ relsWithVar[i]] positionsRelIPlus1 = componentVarPositions[v][ relsWithVar[i + 1]] for pos1 in positionsRelI: for pos2 in positionsRelIPlus1: equalityConstraints.append( "%s.v%d = %s.v%d" % (relationSQLIds[relsWithVar[i]], relsWithVar[i].getTableColumn(pos1), relationSQLIds[relsWithVar[i + 1]], relsWithVar[i + 1].getTableColumn(pos2))) if len(equalityConstraints): whereSQL = "WHERE %s" % ' AND '.join(equalityConstraints) else: whereSQL = "" conjunctSQL.append("SELECT %s FROM %s %s" % (selectSQL, relationsSQL, whereSQL)) lineageQuery = ' UNION '.join(conjunctSQL) print algorithm.getPrettySQL(lineageQuery) return lineageQuery
def prepareQuery(self, query): maxComponentRels = max([len(c.getRelations()) for c in query.getConjuncts()]) conjunctSQL = [] for conjunct in query.getConjuncts(): relations = conjunct.getRelations() relationNamesUsed = {} relationSQLIds = {} for rel in relations: relName = rel.getName() if relName in relationNamesUsed: relationSQLIds[rel] = relName + \ str(relationNamesUsed[relName] + 1) relationNamesUsed[relName] += 1 else: relationSQLIds[rel] = relName + str(1) relationNamesUsed[relName] = 1 relationNames = [] for rel in relations: relName = rel.getName() relationNames.append( "%s as %s" % (relName, relationSQLIds[rel])) relationsSQL = ', '.join(relationNames) relationsUsed = {} selectAtts = [] equalityConstraints = [] for rel in relations: if rel.isNegated(): selectAtts.append( "'%s', -%s.id, %s.p" % (rel.getName(), relationSQLIds[rel], relationSQLIds[rel])) else: selectAtts.append("'%s', %s.id, %s.p" % ( rel.getName(), relationSQLIds[rel], relationSQLIds[rel])) for i, constant in enumerate(rel.getConstraints()): if not constant: continue elif constant > 0: equalityConstraints.append( "%s.v%d = %d" % (relationSQLIds[rel], i, constant)) else: equalityConstraints.append( "%s.v%d != %d" % (relationSQLIds[rel], i, -1 * constant)) if len(relations) < maxComponentRels: for dummy in range(len(relations), maxComponentRels): selectAtts.append("'', 0, 0") selectSQL = ', '.join(selectAtts) varPositions = [component.getVarPositions() for component in conjunct.getComponents()] for componentVarPositions in varPositions: for v in componentVarPositions.keys(): relsWithVar = componentVarPositions[v].keys() for i in range(len(relsWithVar) - 1): positionsRelI = componentVarPositions[ v][relsWithVar[i]] positionsRelIPlus1 = componentVarPositions[ v][relsWithVar[i + 1]] for pos1 in positionsRelI: for pos2 in positionsRelIPlus1: equalityConstraints.append("%s.v%d = %s.v%d" % (relationSQLIds[relsWithVar[i]], relsWithVar[i].getTableColumn(pos1), relationSQLIds[relsWithVar[i + 1]], relsWithVar[i + 1].getTableColumn(pos2))) if len(equalityConstraints): whereSQL = "WHERE %s" % ' AND '.join(equalityConstraints) else: whereSQL = "" conjunctSQL.append("SELECT %s FROM %s %s" % (selectSQL, relationsSQL, whereSQL)) lineageQuery = ' UNION '.join(conjunctSQL) print algorithm.getPrettySQL(lineageQuery) return lineageQuery
def default(self, line): try: safeSampleEstimates = [] safeSampleTimes = [] karpLubyEstimates = [] karpLubyTimes = [] naiveEstimates = [] naiveTimes = [] exactProb = 0 queryStr = line queryDNF = parse(queryStr) print "Query: ", queryDNF try: plan = getPlan(queryDNF) querySQL = plan.generateSQL_DNF() print algorithm.getPrettySQL(querySQL), "\n" if self.graphQueryPlan: drawTree(plan, self.graphQueryPlanFile) print "\nQuery plan saved to %s" % self.graphQueryPlanFile if self.execSQL: exactProb = printProbability(querySQL) except algorithm.UnsafeException: if self.sample: try: startTime = time.time() print("Query unsafe, ", "trying to find safe residual query") (relationsToSample, residualDNF, querySQL, relsObjects) = algorithm.\ findSafeResidualQuery(queryDNF) print("Relations to sample: ", ', '.join(relationsToSample)) print "Residual Query: ", residualDNF print algorithm.getPrettySQL(querySQL), "\n" if self.execSQL: ssExecutor = safe.SafeSample(conn) estimate = ssExecutor.safeSample( relationsToSample, relsObjects, querySQL, self.numSamples, self.epsilon, self.delta) print "SafeSample Estimate:", estimate safeSampleTimes = ssExecutor.sampleTimes print "SafeSample Total Time: %f seconds" % ( time.time() - startTime) print "SafeSample Mean Sample Time: %f seconds" % ( sum(safeSampleTimes) / float(len(safeSampleTimes))) safeSampleEstimates = ssExecutor.getEstimates() print "SafeSample Number of Samples: " + \ "%d (%d + %d to estimate variance)" % ( len(safeSampleEstimates) + ssExecutor.step2NumSamples, len(safeSampleEstimates), ssExecutor.step2NumSamples) except algorithm.UnsafeException: print "Error: No safe residual query found" else: print "Query is unsafe" if self.naive: print "" startTime = time.time() naiveExecutor = naive.NaiveSampler(conn) estimate = naiveExecutor.naiveSample(queryDNF, self.numSamples) print "Naive Sampler Estimate: %f (%d samples)" % ( estimate, self.numSamples) naiveEstimates = naiveExecutor.estimatesRecord naiveTimes = naiveExecutor.sampleTimes print "Naive Sampler Total Time: %f seconds" % (time.time() - startTime) print "Naive Sampler Mean Sample Time: %f seconds" % ( sum(naiveTimes) / float(len(naiveTimes))) if self.karpluby: print "" startTime = time.time() klExecutor = karp_luby.KarpLuby(conn) estimate = klExecutor.karpLuby(queryDNF, self.numSamples, self.epsilon, self.delta) print "Karp-Luby Estimate:", estimate karpLubyTimes = klExecutor.sampleTimes print "Karp-Luby Total Time: %f seconds" % (time.time() - startTime) print "Karp-Luby Mean Sample Time: %f seconds" % ( sum(karpLubyTimes) / float(len(karpLubyTimes))) karpLubyEstimates = klExecutor.getEstimates() print "Karp-Luby Number of Samples: %d (%d + %d to estimate variance)" % ( len(karpLubyEstimates) + klExecutor.step2NumSamples, len(karpLubyEstimates), klExecutor.step2NumSamples) print "Karp-Luby Stopping Rule Samples " + \ "(included in total above): %d" % ( klExecutor.step1NumSamples) except ParseError: print "Failed to parse"
def prepareQuery(self, query): allRelationsUsed = set() conjunctSQL = [] for conjunct in query.getConjuncts(): relations = conjunct.getRelations() relationNamesUsed = {} relationSQLIds = {} for rel in relations: relName = rel.getName() if relName in relationNamesUsed: relationSQLIds[rel] = relName + \ str(relationNamesUsed[relName] + 1) relationNamesUsed[relName] += 1 else: relationSQLIds[rel] = relName + str(1) relationNamesUsed[relName] = 1 if relName not in allRelationsUsed: allRelationsUsed.add(relName) relationNames = [] for rel in relations: relName = rel.getName() relationNames.append( "%s as %s" % (relName, relationSQLIds[rel])) relationsSQL = ', '.join(relationNames) relationsUsed = {} equalityConstraints = [] for rel in relations: if rel.isNegated(): equalityConstraints.append( "%s.InSample = 0" % (relationSQLIds[rel])) else: equalityConstraints.append( "%s.InSample = 1" % (relationSQLIds[rel])) for i, constant in enumerate(rel.getConstraints()): if not constant: continue elif constant > 0: equalityConstraints.append( "%s.v%d = %d" % (relationSQLIds[rel], i, constant)) else: equalityConstraints.append( "%s.v%d != %d" % (relationSQLIds[rel], i, -1 * constant)) varPositions = [component.getVarPositions() for component in conjunct.getComponents()] for componentVarPositions in varPositions: for v in componentVarPositions.keys(): relsWithVar = componentVarPositions[v].keys() for i in range(len(relsWithVar) - 1): positionsRelI = componentVarPositions[ v][relsWithVar[i]] positionsRelIPlus1 = componentVarPositions[ v][relsWithVar[i + 1]] for pos1 in positionsRelI: for pos2 in positionsRelIPlus1: equalityConstraints.append("%s.v%d = %s.v%d" % (relationSQLIds[relsWithVar[i]], relsWithVar[i].getTableColumn(pos1), relationSQLIds[relsWithVar[i + 1]], relsWithVar[i + 1].getTableColumn(pos2))) if len(equalityConstraints): whereSQL = "WHERE %s" % ' AND '.join(equalityConstraints) else: whereSQL = "" conjunctSQL.append("SELECT EXISTS (SELECT * FROM %s %s)" % (relationsSQL, whereSQL)) sampledTables = [] for relName in allRelationsUsed: sampledTables.append( "%s as (select *, CASE WHEN random() < p THEN 1 ELSE 0 END as InSample FROM %s)" % (relName, relName)) sampleTableSQL = ', '.join(sampledTables) querySQL = 'WITH %s SELECT true IN (%s) Q' % ( sampleTableSQL, ' UNION '.join(conjunctSQL)) print algorithm.getPrettySQL(querySQL) return querySQL