def testTypes(self): """Test that types are correctly derived and enforced.""" with self.assertRaises(TypeError): repeated.repeated(1, "foo") with self.assertRaises(TypeError): repeated.meld(1, "foo")
def testRepeat(self): query = q.Query("(1, 2, 3, 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 2, 3, 4)) # Repeated values flatten automatically. query = q.Query("(1, (2, 3), 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 2, 3, 4)) # Expressions work. query = q.Query("(1, (2 + 2), 3, 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 4, 3, 4)) # Repeated values are mono-types. with self.assertRaises(errors.EfilterTypeError): query = q.Query("(1, 'foo', 3, 4)") solve.solve(query, {}) # None should be skipped. query = q.Query( ast.Repeat(ast.Literal(None), ast.Literal(2), ast.Literal(None), ast.Literal(4))) self.assertEqual( solve.solve(query, {}).value, repeated.meld(2, 4))
def testRepeat(self): query = q.Query("(1, 2, 3, 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 2, 3, 4)) # Repeated values flatten automatically. query = q.Query("(1, (2, 3), 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 2, 3, 4)) # Expressions work. query = q.Query("(1, (2 + 2), 3, 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 4, 3, 4)) # Repeated values are mono-types. with self.assertRaises(errors.EfilterTypeError): query = q.Query("(1, 'foo', 3, 4)") solve.solve(query, {}) # None should be skipped. query = q.Query( ast.Repeat(ast.Literal(None), ast.Literal(2), ast.Literal(None), ast.Literal(4))) self.assertEqual(solve.solve(query, {}).value, repeated.meld(2, 4))
def testMembership(self): # Support tuples (lists): self.assertTrue( solve.solve(q.Query("x in [1, 2, 3, 4]"), {"x": 2}).value) self.assertFalse(solve.solve(q.Query("5 in [1, 2, 3, 4]"), {}).value) # Support tuples of strings: self.assertTrue( solve.solve(q.Query("'foo' in ['bar', 'foo']"), {}).value) self.assertTrue( solve.solve(q.Query("'baz' not in ['bar', 'foo']"), {}).value) # Repeated values: self.assertTrue( solve.solve(q.Query("'foo' in ('bar', 'foo')"), {}).value) # Strings can be in strings: self.assertTrue(solve.solve(q.Query("'foo' in 'foobar'"), {}).value) self.assertTrue(solve.solve(q.Query("'foo' in ('foobar')"), {}).value) self.assertTrue(solve.solve(q.Query("'baz' not in 'foobar'"), {}).value) # This should behave as expected - a singleton string is distinct from a # string if in a list, but not in a repeated value. self.assertTrue( solve.solve(q.Query("'foo' not in ['foobar']"), {}).value) # All this should be true for vars as well as literals: self.assertTrue( solve.solve(q.Query("'foo' not in [x]"), {"x": "foobar"}).value) self.assertTrue( solve.solve(q.Query("'foo' in x"), {"x": "foobar"}).value) self.assertTrue( solve.solve(q.Query("'foo' in (x)"), {"x": "foobar"}).value) # Make sure this is all working for unicode strings as well. self.assertTrue( solve.solve(q.Query("'foo' in (x)"), {"x": u"foobar"}).value) self.assertTrue( solve.solve( q.Query(ast.Membership(ast.Literal(u"foo"), ast.Literal(u"foobar"))), {}).value) # Repeated values behave correctly. self.assertTrue( solve.solve(q.Query("'foo' in x"), {"x": repeated.meld("foo", "bar")}).value) self.assertTrue( solve.solve(q.Query("'foo' not in x"), {"x": repeated.meld("foobar", "bar")}).value) # This is where it gets tricky: a repeated value of a single value is # equal to the single value - this is how EFILTER is supposed to work. # In this case it may be unexpected, but them's the breaks. self.assertTrue( solve.solve(q.Query("'foo' not in ('foobar', 'bar')"), {}).value) self.assertTrue(solve.solve(q.Query("'foo' in ('foobar')"), {}).value) # Single characters should behave correctly. self.assertTrue(solve.solve(q.Query("'f' in 'foo'"), {}).value)
def testAny(self): self.assertTrue( solve.solve( q.Query("any Process.parent where (pid == 1)"), {"Process": {"parent": repeated.meld( mocks.Process(1, None, None), mocks.Process(2, None, None))}}).value) # Test that unary ANY works as expected. query = q.Query(ast.Any(ast.Var("x"))) self.assertFalse(solve.solve(query, {"x": None}).value) self.assertTrue(solve.solve(query, {"x": 1}).value) self.assertTrue(solve.solve(query, {"x": repeated.meld(1, 2, 3)}).value)
def testReverseLists(self): """Test that reverse supports both lists and IRepeated.""" # "lists" are actually Python tuples. self.assertEqual(api.apply("reverse([1, 2, 3])"), (3, 2, 1)) self.assertEqual(api.apply("reverse((1, 2, 3))"), repeated.meld(3, 2, 1))
def testSubselects(self): query = q.Query( "5 + SELECT age FROM" " (bind('age': 10, 'name': 'Tom'), bind('age': 8, 'name': 'Jerry'))" " WHERE name == 'Jerry'") self.assertEqual(solve.solve(query, {}).value, 13) # This should fail because we're selecting two values. query = q.Query( "5 + SELECT age, name FROM" " (bind('age': 10, 'name': 'Tom'), bind('age': 8, 'name': 'Jerry'))" " WHERE name == 'Jerry'") with self.assertRaises(errors.EfilterTypeError): solve.solve(query, {}) # Returning multiple results from SELECT should work with set # operations. query = q.Query("let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = SELECT name FROM users" " SELECT * FROM users WHERE name IN names") self.assertValuesEqual( solve.solve(query, {}).value, repeated.meld({ "age": 10, "name": "Tom" }, { "age": 8, "name": "Jerry" })) # However, equivalence should blow up: query = q.Query("let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = SELECT name FROM users" " SELECT * FROM users WHERE name == names") with self.assertRaises(errors.EfilterTypeError): # Need to force the results to be realized (solve is lazy), hence # the list. list(solve.solve(query, {}).value) # It also shouldn't work if the subselect returns multiple columns. # However, equivalence should blow up: query = q.Query("let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = SELECT * FROM users" " SELECT * FROM users WHERE name IN names") with self.assertRaises(errors.EfilterTypeError): # Need to force the results to be realized (solve is lazy), hence # the list. list(solve.solve(query, {}).value)
def testSubselects(self): # This should fail because we're selecting two values. query = q.Query( "5 + SELECT age, name FROM" " (bind('age': 10, 'name': 'Tom'), bind('age': 8, 'name': 'Jerry'))" " WHERE name == 'Jerry'") with self.assertRaises(errors.EfilterTypeError): solve.solve(query, {}) # Returning multiple results from SELECT should work with set # operations. query = q.Query("let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = (SELECT name FROM users) " " SELECT * FROM users WHERE name IN names") self.assertValuesEqual( solve.solve(query, {}).value, repeated.meld({ "age": 10, "name": "Tom" }, { "age": 8, "name": "Jerry" }))
def solve_resolve(expr, vars): """Use IStructured.resolve to get member (rhs) from the object (lhs). This operation supports both scalars and repeated values on the LHS - resolving from a repeated value implies a map-like operation and returns a new repeated values. """ objs = __solve_for_repeated(expr.lhs, vars) member = solve(expr.rhs, vars).value try: results = [structured.resolve(o, member) for o in repeated.getvalues(objs)] except (KeyError, AttributeError): # Raise a better exception for the non-existent member. raise errors.EfilterKeyError(root=expr.rhs, key=member, query=expr.source) except (TypeError, ValueError): # Is this a null object error? if vars.locals is None: raise errors.EfilterNoneError( root=expr, query=expr.source, message="Cannot resolve member %r from a null." % member) else: raise except NotImplementedError: raise errors.EfilterError( root=expr, query=expr.source, message="Cannot resolve members from a non-structured value.") return Result(repeated.meld(*results), ())
def solve_select(expr, vars): """Use IAssociative.select to get key (rhs) from the data (lhs). This operation supports both scalars and repeated values on the LHS - selecting from a repeated value implies a map-like operation and returns a new repeated value. """ data = solve(expr.lhs, vars).value key = solve(expr.rhs, vars).value try: results = [associative.select(d, key) for d in repeated.getvalues(data)] except (KeyError, AttributeError): # Raise a better exception for accessing a non-existent key. raise errors.EfilterKeyError(root=expr, key=key, query=expr.source) except (TypeError, ValueError): # Raise a better exception for what is probably a null pointer error. if vars.locals is None: raise errors.EfilterNoneError( root=expr, query=expr.source, message="Cannot select key %r from a null." % key) else: raise except NotImplementedError: raise errors.EfilterError( root=expr, query=expr.source, message="Cannot select keys from a non-associative value.") return Result(repeated.meld(*results), ())
def testSubselects(self): query = q.Query( "5 + SELECT age FROM" " (bind('age': 10, 'name': 'Tom'), bind('age': 8, 'name': 'Jerry'))" " WHERE name == 'Jerry'") self.assertEqual( solve.solve(query, {}).value, 13) # This should fail because we're selecting two values. query = q.Query( "5 + SELECT age, name FROM" " (bind('age': 10, 'name': 'Tom'), bind('age': 8, 'name': 'Jerry'))" " WHERE name == 'Jerry'") with self.assertRaises(errors.EfilterTypeError): solve.solve(query, {}) # Returning multiple results from SELECT should work with set # operations. query = q.Query( "let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = SELECT name FROM users" " SELECT * FROM users WHERE name IN names") self.assertValuesEqual( solve.solve(query, {}).value, repeated.meld({"age": 10, "name": "Tom"}, {"age": 8, "name": "Jerry"})) # However, equivalence should blow up: query = q.Query( "let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = SELECT name FROM users" " SELECT * FROM users WHERE name == names") with self.assertRaises(errors.EfilterTypeError): # Need to force the results to be realized (solve is lazy), hence # the list. list(solve.solve(query, {}).value) # It also shouldn't work if the subselect returns multiple columns. # However, equivalence should blow up: query = q.Query( "let users = (" " bind('age': 10, 'name': 'Tom')," " bind('age': 8, 'name': 'Jerry')" ")," "names = SELECT * FROM users" " SELECT * FROM users WHERE name IN names") with self.assertRaises(errors.EfilterTypeError): # Need to force the results to be realized (solve is lazy), hence # the list. list(solve.solve(query, {}).value)
def solve_select(expr, vars): """Use IAssociative.select to get key (rhs) from the data (lhs). This operation supports both scalars and repeated values on the LHS - selecting from a repeated value implies a map-like operation and returns a new repeated value. """ data = __solve_for_repeated(expr.lhs, vars) key = solve(expr.rhs, vars).value try: results = [associative.select(d, key) for d in repeated.getvalues(data)] except (KeyError, AttributeError): # Raise a better exception for accessing a non-existent key. raise errors.EfilterKeyError(root=expr, key=key, query=expr.source) except (TypeError, ValueError): # Raise a better exception for what is probably a null pointer error. if vars.locals is None: raise errors.EfilterNoneError( root=expr, query=expr.source, message="Cannot select key %r from a null." % key) else: raise except NotImplementedError: raise errors.EfilterError( root=expr, query=expr.source, message="Cannot select keys from a non-associative value.") return Result(repeated.meld(*results), ())
def __solve_for_repeated(expr, vars): """Helper: solve 'expr' always returning an IRepeated. If the result of solving 'expr' is a list or a tuple of IStructured objects then treat is as a repeated value of IStructured objects because that's what the called meant to do. This is a convenience helper so users of the API don't have to create IRepeated objects. If the result of solving 'expr' is a scalar then return it as a repeated value of one element. Arguments: expr: Expression to solve. vars: The scope. Returns: IRepeated result of solving 'expr'. A booelan to indicate whether the original was repeating. """ var = solve(expr, vars).value if (var and isinstance(var, (tuple, list)) and protocol.implements(var[0], structured.IStructured)): return repeated.meld(*var), False return var, repeated.isrepeating(var)
def testGroup(self): result = api.apply( query=q.Query( ( "group", # The input: ("apply", ("var", "csv"), ("param", 0), True), # The grouper expression: ("var", "country"), # The output reducers: ("reducer", ("var", "singleton"), ("var", "country")), ("reducer", ("var", "mean"), ("cast", ("var", "age"), ("var", "int"))), ("reducer", ("var", "sum"), ("cast", ("var", "age"), ("var", "int")))), params=[testlib.get_fixture_path("fake_users.csv")]), allow_io=True) # Round the output means for comparison. actual = [] for row in result: row[1] = int(row[1]) actual.append(row) expected = repeated.meld(['El Salvador', 55, 1287], ['Ethiopia', 55, 1210], ['French Guiana', 47, 381], ['Germany', 42, 299], ['Haiti', 46, 610], ['Mayotte', 50, 865], ['Portugal', 48, 485]) self.assertItemsEqual(expected, actual)
def solve_resolve(expr, vars): """Use IStructured.resolve to get member (rhs) from the object (lhs). This operation supports both scalars and repeated values on the LHS - resolving from a repeated value implies a map-like operation and returns a new repeated values. """ objs, _ = __solve_for_repeated(expr.lhs, vars) member = solve(expr.rhs, vars).value try: results = [ structured.resolve(o, member) for o in repeated.getvalues(objs) ] except (KeyError, AttributeError): # Raise a better exception for the non-existent member. raise errors.EfilterKeyError(root=expr.rhs, key=member, query=expr.source) except (TypeError, ValueError): # Is this a null object error? if vars.locals is None: raise errors.EfilterNoneError( root=expr, query=expr.source, message="Cannot resolve member %r from a null." % member) else: raise except NotImplementedError: raise errors.EfilterError( root=expr, query=expr.source, message="Cannot resolve members from a non-structured value.") return Result(repeated.meld(*results), ())
def solve_resolve(expr, vars): """Use IStructured.resolve to get member (rhs) from the object (lhs).""" objs = __within_lhs_as_repeated(expr.lhs, vars) member = solve(expr.rhs, vars).value try: results = [structured.resolve(o, member) for o in repeated.getvalues(objs)] except (KeyError, AttributeError): # Raise a better exception for the non-existent member. raise errors.EfilterKeyError(root=expr.rhs, key=member, query=expr.source) except (TypeError, ValueError): # Is this a null object error? if vars is None: raise errors.EfilterNoneError( root=expr, query=expr.source, message="Cannot resolve member %r from a null." % member) else: raise except NotImplementedError: raise errors.EfilterError( root=expr, query=expr.source, message="Cannot resolve members from a non-structured value.") return Result(repeated.meld(*results), ())
def solve_group(expr, vars): rows = __solve_for_repeated(expr.lhs, vars) reducers = [solve(child, vars).value for child in expr.reducers] r = reducer.Compose(*reducers) intermediates = {} # To avoid loading too much data into memory we segment the input rows. for chunk in reducer.generate_chunks(rows, reducer.DEFAULT_CHUNK_SIZE): # Group rows based on the output of the grouper expression. groups = {} for value in chunk: key = solve(expr.grouper, __nest_scope(expr.lhs, vars, value)).value grouped_values = groups.setdefault(key, []) grouped_values.append(value) # Fold each group in this chunk, merge with previous intermediate, if # any. for key, group in six.iteritems(groups): intermediate = reducer.fold(r, group) previous = intermediates.get(key) if previous: intermediate = reducer.merge(r, intermediate, previous) intermediates[key] = intermediate # This could equally well return a lazy repeated value to avoid finalizing # right away. The assumption here is that finalize is cheap, at least # compared to fold and merge, which already have to run eagerly. Using a # lazy value here would keep the intermediates around in memory, and just # doesn't seem worth it. results = [reducer.finalize(r, intermediate) for intermediate in six.itervalues(intermediates)] return Result(repeated.meld(*results), ())
def solve_group(expr, vars): rows = solve(expr.lhs, vars).value reducers = [solve(child, vars).value for child in expr.reducers] r = reducer.Compose(*reducers) intermediates = {} # To avoid loading too much data into memory we segment the input rows. for chunk in reducer.generate_chunks(rows, reducer.DEFAULT_CHUNK_SIZE): # Group rows based on the output of the grouper expression. groups = {} for value in chunk: key = solve(expr.grouper, __nest_scope(expr.lhs, vars, value)).value grouped_values = groups.setdefault(key, []) grouped_values.append(value) # Fold each group in this chunk, merge with previous intermediate, if # any. for key, group in six.iteritems(groups): intermediate = reducer.fold(r, group) previous = intermediates.get(key) if previous: intermediate = reducer.merge(r, intermediate, previous) intermediates[key] = intermediate # This could equally well return a lazy repeated value to avoid finalizing # right away. The assumption here is that finalize is cheap, at least # compared to fold and merge, which already have to run eagerly. Using a # lazy value here would keep the intermediates around in memory, and just # doesn't seem worth it. results = [reducer.finalize(r, intermediate) for intermediate in six.itervalues(intermediates)] return Result(repeated.meld(*results), ())
def testEq(self): """Test value_eq on LazyLineReader.""" baseline = repeated.meld("Alice\n", "Bob\n", "Charlie\n", "Dave\n", "Eve\n", "Frank") with open(testlib.get_fixture_path("names.txt"), "r") as fd: reader = line_reader.LazyLineReader(fd) self.assertValuesEqual(baseline, reader)
def testEach(self): self.assertFalse( solve.solve( q.Query("each(Process.parent, (pid == 1))"), {"Process": {"parent": repeated.meld( mocks.Process(1, None, None), mocks.Process(2, None, None))}}).value)
def testCompare(self): def _generator(): yield 1 yield 2 yield 3 self.assertEqual(lazy_repetition.LazyRepetition(_generator), repeated.meld(1, 2, 3))
def testFirst(self): self.assertEqual(core.First()(repeated.meld(1, 2, 3, 4)), 1) self.assertEqual(core.First()(1), 1) self.assertEqual(core.First()([1, 2]), [1, 2]) self.assertEqual(core.First()(None), None)
def testNulls(self): r = None # Should be zero elements but not raise. self.assertEqual(repeated.getvalues(r), ()) r = repeated.meld(None, None) # None should get skipped. self.assertEqual(repeated.getvalues(r), ())
def testFilter(self): self.assertValuesEqual( solve.solve( q.Query("select * from Process where (pid == 1)"), {"Process": repeated.meld( mocks.Process(2, None, None), mocks.Process(1, None, None))}).value, mocks.Process(1, None, None))
def testFilter(self): self.assertValuesEqual( solve.solve( q.Query("select * from Process where (pid == 1)"), { "Process": repeated.meld(mocks.Process(2, None, None), mocks.Process(1, None, None)) }).value, mocks.Process(1, None, None))
def testFirst(self): self.assertEqual(core.First()(repeated.meld(1, 2, 3, 4)), 1) self.assertEqual(core.First()(1), 1) self.assertEqual(core.First()([1, 2]), 1) self.assertEqual(core.First()(None), None)
def testReducer(self): # This should return a reducer that computes the mean of the age # property on a repeated object (tests let us use a dict as a stand-in). r = api.apply(("reducer", ("var", "mean"), ("var", "age"))) self.assertIsInstance(r, reducer.IReducer) users = repeated.meld({"name": "Mary", "age": 10}, {"name": "Bob", "age": 20}) average = reducer.reduce(r, users) self.assertEqual(average, 15)
def solve_repeat(expr, vars): """Build a repeated value from subexpressions.""" try: result = repeated.meld(*[solve(x, vars).value for x in expr.children]) return Result(result, ()) except TypeError: raise errors.EfilterTypeError( root=expr, query=expr.source, message="All values in a repeated value must be of the same type.")
def testNulls(self): r = None for _ in repeated.getvalues(r): # Should be zero elements but not raise. self.assertFail() r = repeated.meld(None, None) # None should get skipped. for _ in repeated.getvalues(r): self.assertFail()
def testDrop(self): self.assertValuesEqual( core.Drop()(2, repeated.meld(1, 2, 3, 4)), repeated.meld(3, 4)) # Also should support tuples. self.assertValuesEqual( core.Drop()(2, (1, 2, 3, 4)), repeated.meld(3, 4)) # Exceeding bounds is fine. self.assertValuesEqual( core.Drop()(10, (1, 2, 3)), None) # Dropping zero. self.assertValuesEqual( core.Drop()(0, (1, 2, 3)), repeated.meld(1, 2, 3))
def testAny(self): self.assertTrue( solve.solve( q.Query("any Process.parent where (pid == 1)"), { "Process": { "parent": repeated.meld(mocks.Process(1, None, None), mocks.Process(2, None, None)) } }).value) # Test that unary ANY works as expected. query = q.Query(ast.Any(ast.Var("x"))) self.assertFalse(solve.solve(query, {"x": None}).value) self.assertTrue(solve.solve(query, {"x": 1}).value) self.assertTrue( solve.solve(query, { "x": repeated.meld(1, 2, 3) }).value)
def testEach(self): self.assertFalse( solve.solve( q.Query("each(Process.parent, (pid == 1))"), { "Process": { "parent": repeated.meld(mocks.Process(1, None, None), mocks.Process(2, None, None)) } }).value)
def testReverse(self): query = q.Query( ast.Apply( ast.Var("reverse"), ast.Repeat( ast.Literal(1), ast.Literal(2), ast.Literal(3)))) self.assertEqual( solve.solve(query, {}).value, repeated.meld(3, 2, 1))
def testDropAndTake(self): """Test that dropping and taking works properly.""" self.assertValuesEqual(api.apply("drop(2, (1, 2, 3, 4))"), repeated.meld(3, 4)) self.assertValuesEqual(api.apply("drop(3, (1, 2, 3, 4))"), 4) self.assertValuesEqual(api.apply("take(1, drop(2, (1, 2, 3, 4)))"), 3) # Alternate syntax to do the same thing. self.assertValuesEqual( api.apply("SELECT * FROM (1, 2, 3, 4) LIMIT 1 OFFSET 2"), 3)
def solve_sort(expr, vars): """Sort values on the LHS by the value they yield when passed to RHS.""" lhs_values = repeated.getvalues(__solve_for_repeated(expr.lhs, vars)) sort_expression = expr.rhs def _key_func(x): return solve(sort_expression, __nest_scope(expr.lhs, vars, x)).value results = ordered.ordered(lhs_values, key_func=_key_func) return Result(repeated.meld(*results), ())
def resolve(self, name): """Pretend the plugin is an IStructured instead of a function. This lets us pretend that the plugin is a structured datatype (like an object) making it possible for the user to get data without calling it as a function. The first time the plugin is asked for any data we just run 'apply' with no arguments to populate 'rows'. """ # Make sure we have data. self.apply((), {}) return repeated.meld(*[r[name] for r in self.rows])
def solve_sort(expr, vars): """Sort values on the LHS by the value they yield when passed to RHS.""" lhs_values = repeated.getvalues(__solve_for_repeated(expr.lhs, vars)[0]) sort_expression = expr.rhs def _key_func(x): return solve(sort_expression, __nest_scope(expr.lhs, vars, x)).value results = ordered.ordered(lhs_values, key_func=_key_func) return Result(repeated.meld(*results), ())
def resolve(self, name): """Pretend the plugin is an IStructured instead of a function. This lets us pretend that the plugin is a structured datatype (like an object) making it possible for the user to get data without calling it as a function. The first time the plugin is asked for any data we just run 'apply' with no arguments to populate 'rows'. """ # Make sure we have data. self.materialize() return repeated.meld(*[r[name] for r in self.rows])
def testSort(self): self.assertEqual( solve.solve( q.Query("select * from Process order by pid"), {"Process": repeated.meld(mocks.Process(2, None, None), mocks.Process(1, None, None))}, ).value, repeated.meld(mocks.Process(1, None, None), mocks.Process(2, None, None)), ) # How about nested repeated fields? This should sort the process # children and return those. self.assertEqual( solve.solve( q.Query("select * from Process.children order by pid"), {"Process": {"children": repeated.meld(mocks.Process(2, None, None), mocks.Process(1, None, None))}}, ).value, repeated.meld(mocks.Process(1, None, None), mocks.Process(2, None, None)), ) # Sorting BY a repeated expression should be the same as sorting by # a tuple. self.assertEqual( solve.solve( q.Query("select name, surname from people order by " "[lower(surname), lower(name)]"), { "people": [ {"name": "John", "surname": "Smith"}, {"name": "John", "surname": "Brown"}, {"name": "John", "surname": "Lennon"}, {"name": "Alice", "surname": "Brown"}, ] }, ).value, repeated.meld( {"name": "Alice", "surname": "Brown"}, {"name": "John", "surname": "Brown"}, {"name": "John", "surname": "Lennon"}, {"name": "John", "surname": "Smith"}, ), ) self.assertEqual( solve.solve( q.Query("select name, surname from people order by " "(lower(surname), lower(name))"), { "people": [ {"name": "John", "surname": "Smith"}, {"name": "John", "surname": "Brown"}, {"name": "John", "surname": "Lennon"}, {"name": "Alice", "surname": "Brown"}, ] }, ).value, repeated.meld( {"name": "Alice", "surname": "Brown"}, {"name": "John", "surname": "Brown"}, {"name": "John", "surname": "Lennon"}, {"name": "John", "surname": "Smith"}, ), )
def testCreation(self): """Test that creation is reasonable.""" # This should make a repeated var of two values. r = repeated.repeated("foo", "bar") # It should be a repeated var. self.assertIsInstance(r, repeated.IRepeated) # And also have more than one value. self.assertTrue(repeated.isrepeating(r)) # Repeating a single value will still create a repeated var. r = repeated.repeated("foo") self.assertIsInstance(r, repeated.IRepeated) # But it won't be repeating (have more than one value). self.assertFalse(repeated.isrepeating(r)) # Using meld will just return a scalar on one value. r = repeated.meld("foo") self.assertIsInstance(r, six.string_types) # Meld on two values has the same behavior as repeated. r = repeated.meld("foo", "foo") self.assertIsInstance(r, repeated.IRepeated)
def testRepeat(self): query = q.Query("(1, 2, 3, 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, 2, 3, 4)) # Repeated values do not flatten automatically. query = q.Query("(1, (2, 3), 4)") self.assertEqual( solve.solve(query, {}).value, repeated.meld(1, [2, 3], 4)) # Expressions work. query = q.Query("(1, (2 + 2), 3, 4)") self.assertEqual( solve.solve(query, {}).value, # Operators always return a list. repeated.meld(1, [4], 3, 4)) # None should be skipped. query = q.Query( ast.Repeat(ast.Literal(None), ast.Literal(2), ast.Literal(None), ast.Literal(4))) self.assertEqual(solve.solve(query, {}).value, repeated.meld(2, 4))
def __within_lhs_as_repeated(lhs_expr, vars): """Map/Filter/others support lists and IRepeated on the LHS. If the value of 'lhs_expr' is a list or tuple of IAssociative objects then treat it as an IRepeated of IAssociative objects because that is what the caller meant to do. This is a convenience so that users don't have to create IRepeated objects. """ var = solve(lhs_expr, vars).value if (var and isinstance(var, (tuple, list)) and protocol.implements(var[0], associative.IAssociative)): return repeated.meld(*var) return var
def testReducer(self): # This should return a reducer that computes the mean of the age # property on a repeated object (tests let us use a dict as a stand-in). r = api.apply(("reducer", ("var", "mean"), ("var", "age"))) self.assertIsInstance(r, reducer.IReducer) users = repeated.meld({ "name": "Mary", "age": 10 }, { "name": "Bob", "age": 20 }) average = reducer.reduce(r, users) self.assertEqual(average, 15)
def testTake(self): self.assertValuesEqual( core.Take()(2, repeated.meld(1, 2, 3, 4)), repeated.meld(1, 2)) # Also should support tuples. self.assertValuesEqual( core.Take()(2, (1, 2, 3, 4)), repeated.meld(1, 2)) # Exceeding the bounds is fine. self.assertValuesEqual( core.Take()(10, (1, 2, 3)), repeated.meld(1, 2, 3)) # Taking zero. self.assertValuesEqual( core.Take()(0, (1, 2, 3)), None) # Taking from empty. self.assertValuesEqual( core.Take()(10, ()), None)
def testDropAndTake(self): """Test that dropping and taking works properly.""" self.assertValuesEqual( api.apply("drop(2, (1, 2, 3, 4))"), repeated.meld(3, 4)) self.assertValuesEqual( api.apply("drop(3, (1, 2, 3, 4))"), 4) self.assertValuesEqual( api.apply("take(1, drop(2, (1, 2, 3, 4)))"), 3) # Alternate syntax to do the same thing. self.assertValuesEqual( api.apply("SELECT * FROM (1, 2, 3, 4) LIMIT 1 OFFSET 2"), 3)
class MockRootType(object): DATA = { "Process": Process, "proc": Process(10, "Finder", None), "MockFunction": MockFunction(), "pslist": repeated.meld(Process(1, "init", None), Process(10, "Finder", None)) } def resolve(self, name): return self.DATA[name] @classmethod def getmembers(cls): return list(cls.DATA.keys())
def testGroup(self): result = api.apply( query=q.Query( ("group", # The input: ("apply", ("var", "csv"), ("param", 0), True), # The grouper expression: ("var", "country"), # The output reducers: ("reducer", ("var", "singleton"), ("var", "country")), ("reducer", ("var", "mean"), ("cast", ("var", "age"), ("var", "int"))), ("reducer", ("var", "sum"), ("cast", ("var", "age"), ("var", "int")))), params=[testlib.get_fixture_path("fake_users.csv")]), allow_io=True) # Round the output means for comparison. actual = [] for row in result: row[1] = int(row[1]) actual.append(row) expected = repeated.meld(['El Salvador', 55, 1287], ['Ethiopia', 55, 1210], ['French Guiana', 47, 381], ['Germany', 42, 299], ['Haiti', 46, 610], ['Mayotte', 50, 865], ['Portugal', 48, 485]) self.assertItemsEqual(expected, actual)
def testIOReads(self): """Test that IO is properly hooked up when requested.""" self.assertValuesEqual( api.apply("SELECT * FROM csv(?)", replacements=[testlib.get_fixture_path("small.csv")], allow_io=True), repeated.meld(*small_csv.EXPECTED)) # Keyword arguments to 'csv' should work. result = api.apply( "SELECT * FROM csv(?, decode_header: true)", replacements=[testlib.get_fixture_path("small.csv")], allow_io=True) first_row = next(iter(result)) self.assertEqual(dict(Name="Alice", Age="25", City="Zurich"), first_row) # The FD closes, right? fd = result.source.fd result = None self.assertTrue(fd.closed)
class MockRootType(object): DATA = { "Process": Process, "proc": Process(10, "Finder", None), "MockFunction": MockFunction(), "pslist": repeated.meld(Process(1, "init", None), Process(10, "Finder", None)) } def resolve(self, name): return self.DATA[name] @classmethod def reflect_static_member(cls, name): var = cls.DATA.get(name) if var: return repeated.value_type(var) @classmethod def getmembers_static(cls): return cls.DATA.keys()