def _GetUnencryptedValuesWithType(table, column_index, value_type): if (value_type is None or value_type.lower() not in ['string', 'integer', 'float']): raise ValueError('Not an known type.') value_type = value_type.lower() value_column = [] for i in range(len(table)): if table[i][column_index] is None: value = util.LiteralToken('null', None) else: value = table[i][column_index] if value_type == 'string': value = util.StringLiteralToken('"%s"' % str(value).strip()) elif value_type == 'integer': value = long(value) else: value = float(value) value_column.append(value) return value_column
def _CollapseFunctions(stack): """Collapses functions by evaluating them for actual values. Replaces a function's postfix expression with a single token. If the function can be evaluated (no fields included as arguments), the single token is the value of function's evaluation. Otherwise, the function is collapsed into a single token without evaluation. Arguments: stack: The stack whose functions are to be collapsed and resolved. Raises: bigquery_client.BigqueryInvalidQueryError: If a field exists inside the arguments of a function. Returns: True iff a function is found and collapsed. In other words, another potential function can still exist. """ for i in xrange(len(stack)): if isinstance(stack[i], util.BuiltInFunctionToken): start_idx, postfix_expr = interpreter.GetSingleValue(stack[:i + 1]) if util.IsEncryptedExpression(postfix_expr): raise bigquery_client.BigqueryInvalidQueryError( 'Invalid aggregation function argument: Cannot put an encrypted ' 'field as an argument to a built-in function.', None, None, None) # If the expression has no fields, we want to get the actual value. # But, if the field has a field, we have to get the infix string instead. try: result = interpreter.Evaluate(list(postfix_expr)) if isinstance(result, basestring): result = util.StringLiteralToken('"%s"' % result) elif result is None: result = util.LiteralToken('NULL', None) elif str(result).lower() in ['true', 'false']: result = util.LiteralToken(str(result).lower(), result) stack[start_idx:i + 1] = [result] except bigquery_client.BigqueryInvalidQueryError: result = interpreter.ToInfix(list(postfix_expr)) stack[start_idx:i + 1] = [util.FieldToken(result)] return True return False
def testComputeRows(self): # Query is 'SELECT 1 + 1, 1 * 1' # Testing no queried values. stack = [[1, 1, util.OperatorToken('+', 2)], [1, 1, util.OperatorToken('*', 2)]] query = {} real_result = [['2', '1']] result = encrypted_bigquery_client._ComputeRows(stack, query) self.assertEqual(result, real_result) # Query is 'SELECT 1 + a, 1 * b, "hello"' # There are two rows of values for a and b (shown in query). # Result becomes as below: # 1 + a | 1 * b | "hello" # 2 3 "hello" # 4 5 "hello" stack = [[1, util.FieldToken('a'), util.OperatorToken('+', 2)], [1, util.FieldToken('b'), util.OperatorToken('*', 2)], [util.StringLiteralToken('"hello"')]] query = {'a': [1, 3], 'b': [3, 5]} real_result = [['2', '3', 'hello'], ['4', '5', 'hello']] result = encrypted_bigquery_client._ComputeRows(stack, query) self.assertEqual(result, real_result)
def testEncryptedContains(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.FieldToken('Year'), util.BuiltInFunctionToken('string'), util.StringLiteralToken('"1"'), util.OperatorToken('CONTAINS', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(string(Year) contains "1")') stack = [ util.SearchwordsToken('Model'), util.StringLiteralToken('"A"'), util.OperatorToken('contains', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.SEARCHWORDS_PREFIX + 'Model contains ' 'to_base64(left(bytes(sha1(concat(left(' + util.SEARCHWORDS_PREFIX + 'Model, 24), \'yB9HY2qv+DI=\'))), 8)))') stack = [ util.SearchwordsToken('Model'), util.FieldToken('Year'), util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.PseudonymToken('Make'), 'A', util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.SearchwordsToken('Model'), util.SearchwordsToken('Model'), util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ 'Hello', util.SearchwordsToken('Model'), util.OperatorToken('contains', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.SearchwordsToken('Model'), util.StringLiteralToken('"A"'), util.OperatorToken('contains', 2), util.OperatorToken('not', 1) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), 'not (' + util.SEARCHWORDS_PREFIX + 'Model contains ' 'to_base64(left(bytes(sha1(concat(left(' + util.SEARCHWORDS_PREFIX + 'Model, 24), \'yB9HY2qv+DI=\'))), 8)))') schema = test_util.GetPlacesSchema() stack = [ util.SearchwordsToken('citiesLived.place'), util.StringLiteralToken('"A"'), util.OperatorToken('contains', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(citiesLived.' + util.SEARCHWORDS_PREFIX + 'place contains ' 'to_base64(left(bytes(sha1(concat(left(citiesLived.' + util.SEARCHWORDS_PREFIX + 'place, 24), \'cBKPKGiY2cg=\'))), 8)))')
def testEncryptedEquality(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.FieldToken('Year'), 1, util.OperatorToken('+', 2), 2000, util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '((Year + 1) = 2000)') stack = [ util.FieldToken('Year'), util.PseudonymToken('Make'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(Year = ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.StringLiteralToken('"Hello"'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == "HS57DHbh2KlkqNJREmu1wQ==")') stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" == ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('!=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" != ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.PseudonymToken('Make2'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == ' + util.PSEUDONYM_PREFIX + 'Make2)') stack = [ util.HomomorphicIntToken('Invoice_Price'), 2, util.OperatorToken('==', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.PseudonymToken('Make'), util.ProbabilisticToken('Price'), util.OperatorToken('=', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) schema = test_util.GetPlacesSchema() stack = [ util.PseudonymToken('spouse.spouseName'), util.StringLiteralToken('"Hello"'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(spouse.' + util.PSEUDONYM_PREFIX + 'spouseName = "HS57DHbh2KlkqNJREmu1wQ==")')
def testEncryptedEquality(self): schema = test_util.GetCarsSchema() key = test_util.GetMasterKey() stack = [ util.FieldToken('Year'), 1, util.OperatorToken('+', 2), 2000, util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '((Year + 1) = 2000)') stack = [ util.FieldToken('Year'), util.PseudonymToken('Make'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(Year = ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.StringLiteralToken('"Hello"'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == "HS57DHbh2KlkqNJREmu1wQ==")') # begin: tests about 'related' schema option schema2 = test_util.GetCarsSchema() for field in schema2: if field['name'] == 'Make': field['related'] = 'cars_name' # value is deterministic calc with related instead of _TABLE_ID stack = [ util.PseudonymToken('Make', related=_RELATED), util.StringLiteralToken('"Hello"'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema2, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == "sspWKAH/NKuUyX8ji1mmSw==")') # token with related attribute makes no sense if schema doesn't have it stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make', related=_RELATED), util.OperatorToken('==', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) # end: tests about 'related' schema option stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" == ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.StringLiteralToken('"Hello"'), util.PseudonymToken('Make'), util.OperatorToken('!=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '("HS57DHbh2KlkqNJREmu1wQ==" != ' + util.PSEUDONYM_PREFIX + 'Make)') stack = [ util.PseudonymToken('Make'), util.PseudonymToken('Make2'), util.OperatorToken('==', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(' + util.PSEUDONYM_PREFIX + 'Make == ' + util.PSEUDONYM_PREFIX + 'Make2)') stack = [ util.HomomorphicIntToken('Invoice_Price'), 2, util.OperatorToken('==', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) stack = [ util.PseudonymToken('Make'), util.ProbabilisticToken('Price'), util.OperatorToken('=', 2) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, interpreter.RewriteSelectionCriteria, stack, schema, key, _TABLE_ID) schema = test_util.GetPlacesSchema() stack = [ util.PseudonymToken('spouse.spouseName'), util.StringLiteralToken('"Hello"'), util.OperatorToken('=', 2) ] self.assertEqual( interpreter.RewriteSelectionCriteria(stack, schema, key, _TABLE_ID), '(spouse.' + util.PSEUDONYM_PREFIX + 'spouseName = "HS57DHbh2KlkqNJREmu1wQ==")')
def testRewriteAggregations(self): stack = [ util.CountStarToken(), util.AggregationFunctionToken('COUNT', 1) ] rewritten_stack = ['COUNT(*)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.ProbabilisticToken('Price'), util.AggregationFunctionToken('COUNT', 1) ] rewritten_stack = ['COUNT(' + util.PROBABILISTIC_PREFIX + 'Price)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.ProbabilisticToken('Price'), 4, util.AggregationFunctionToken('COUNT', 2) ] rewritten_stack = ['COUNT(' + util.PROBABILISTIC_PREFIX + 'Price, 4)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), 5, util.AggregationFunctionToken('DISTINCTCOUNT', 2), util.FieldToken('Year'), util.AggregationFunctionToken('COUNT', 1), util.OperatorToken('+', 2) ] rewritten_stack = ['COUNT(DISTINCT Year, 5)', 'COUNT(Year)', '+'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ 0, util.BuiltInFunctionToken('cos'), util.AggregationFunctionToken('COUNT', 1) ] rewritten_stack = ['COUNT(1.0)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.StringLiteralToken('"Hello"'), 2, util.BuiltInFunctionToken('left'), util.StringLiteralToken('"y"'), util.BuiltInFunctionToken('concat'), util.AggregationFunctionToken('GROUP_CONCAT', 1) ] rewritten_stack = ['GROUP_CONCAT("Hey")'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), util.FieldToken('Year'), util.OperatorToken('*', 2), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = ['SUM((Year * Year))'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price), \'0\')))', '*', '+' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicFloatToken('Holdback_Percentage'), util.AggregationFunctionToken('AVG', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_FLOAT_PREFIX + 'Holdback_Percentage)', '*', 1.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_FLOAT_PREFIX + 'Holdback_Percentage), \'0\')))', '*', '+', 'COUNT(' + util.HOMOMORPHIC_FLOAT_PREFIX + 'Holdback_Percentage)', '/' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicIntToken('Invoice_Price'), 2, util.OperatorToken('+', 2), 5, util.OperatorToken('*', 2), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 5.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price), \'0\')))', '*', '+', 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'SUM((2 * 5))', '*', '+', '+' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.PseudonymToken('Make'), 2, util.AggregationFunctionToken('DISTINCTCOUNT', 2) ] rewritten_stack = [ 'COUNT(DISTINCT ' + util.PSEUDONYM_PREFIX + 'Make, 2)' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), util.AggregationFunctionToken('TOP', 1) ] rewritten_stack = ['TOP(Year)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.PseudonymToken('Make'), 5, 1, util.AggregationFunctionToken('TOP', 3) ] rewritten_stack = ['TOP(' + util.PSEUDONYM_PREFIX + 'Make, 5, 1)'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.FieldToken('Year'), util.BuiltInFunctionToken('cos'), util.HomomorphicIntToken('Invoice_Price'), util.OperatorToken('+', 2), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = [ 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'SUM(cos(Year))', '*', '+', 0.0, 'COUNT(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price)', '*', 1.0, 'TO_BASE64(BYTES(PAILLIER_SUM(FROM_BASE64(' + util.HOMOMORPHIC_INT_PREFIX + 'Invoice_Price),' ' \'0\')))', '*', '+', '+' ] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.ProbabilisticToken('Model'), util.AggregationFunctionToken('DISTINCTCOUNT', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.ProbabilisticToken('Price'), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.HomomorphicFloatToken('Holdback_Percentage'), util.OperatorToken('*', 2), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.HomomorphicFloatToken('Holdback_Percentage'), util.BuiltInFunctionToken('cos'), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.AggregationFunctionToken('TOP', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.FieldToken('Year'), util.AggregationFunctionToken('SUM', 1), util.AggregationFunctionToken('SUM', 1) ] rewritten_stack = ['SUM(SUM(Year))'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.HomomorphicIntToken('Invoice_Price'), util.AggregationFunctionToken('SUM', 1), util.AggregationFunctionToken('SUM', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE) stack = [ util.FieldToken('Year'), util.AggregationFunctionToken('GROUP_CONCAT', 1), util.AggregationFunctionToken('GROUP_CONCAT', 1) ] rewritten_stack = ['GROUP_CONCAT(GROUP_CONCAT(Year))'] self.assertEqual( query_lib._RewriteAggregations([stack], _TEST_NSQUARE), [rewritten_stack]) stack = [ util.PseudonymToken('Make'), util.AggregationFunctionToken('GROUP_CONCAT', 1), util.AggregationFunctionToken('GROUP_CONCAT', 1) ] self.assertRaises(bigquery_client.BigqueryInvalidQueryError, query_lib._RewriteAggregations, [stack], _TEST_NSQUARE)