def testString(self): data = self._gen_data(5) methods_to_fields = [ (lambda s: s.capitalize(), self.expr.name.capitalize()), (lambda s: data[0][0] in s, self.expr.name.contains(data[0][0], regex=False)), (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])), (lambda s: s.endswith(data[0][0]), self.expr.name.endswith(data[0][0])), (lambda s: s.startswith(data[0][0]), self.expr.name.startswith(data[0][0])), (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])), (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])), (lambda s: s.replace(data[0][0], 'test'), self.expr.name.replace(data[0][0], 'test')), (lambda s: s[0], self.expr.name.get(0)), (lambda s: len(s), self.expr.name.len()), (lambda s: s.ljust(10), self.expr.name.ljust(10)), (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20, fillchar='*')), (lambda s: s.rjust(10), self.expr.name.rjust(10)), (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20, fillchar='*')), (lambda s: s * 4, self.expr.name.repeat(4)), (lambda s: s[2:10:2], self.expr.name.slice(2, 10, 2)), (lambda s: s[-5:-1], self.expr.name.slice(-5, -1)), (lambda s: s.title(), self.expr.name.title()), (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)), (lambda s: s.isalnum(), self.expr.name.isalnum()), (lambda s: s.isalpha(), self.expr.name.isalpha()), (lambda s: s.isdigit(), self.expr.name.isdigit()), (lambda s: s.isspace(), self.expr.name.isspace()), (lambda s: s.isupper(), self.expr.name.isupper()), (lambda s: s.istitle(), self.expr.name.istitle()), (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()), (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()), ] fields = [ it[1].rename('id' + str(i)) for i, it in enumerate(methods_to_fields) ] expr = self.expr[fields] res = self.engine.execute(expr) result = self._get_result(res) for i, it in enumerate(methods_to_fields): method = it[0] first = [method(it[0]) for it in data] second = [it[i] for it in result] self.assertEqual(first, second)
def testUnion(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = 'pyodps_test_engine_table2' self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [ ['name3', 5, -1], ['name4', 6, -2] ] self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2]) try: expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name']) res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5], ['name4', 6] ] result = sorted(result) expected = sorted(expected) self.assertEqual(len(result), len(expected)) for e, r in zip(result, expected): self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r]) finally: table2.drop()
def testUnion(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] data2 = [ ['name3', 5, -1], ['name4', 6, -2] ] datatypes = lambda *types: [validate_data_type(t) for t in types] schema2 = Schema.from_lists(['name', 'id2', 'id3'], datatypes('string', 'int64', 'int64')) table_name = tn('pyodps_test_engine_table2') table2 = self._create_table_and_insert_data(table_name, schema2, data2) expr2 = CollectionExpr(_source_data=table2, _schema=schema2) self._gen_data(data=data) try: expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name']) res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5], ['name4', 6] ] result = sorted(result) expected = sorted(expected) self.assertEqual(len(result), len(expected)) for e, r in zip(result, expected): self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r]) finally: [conn.close() for conn in _engine_to_connections.values()] table2.drop()
def testJoin(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = 'pyodps_test_engine_table2' self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [ ['name1', 4, -1], ['name2', 1, -2] ] self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2]) try: expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [ [to_str('name1'), 4], [to_str('name2'), 1] ] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result)) finally: table2.drop()
def testString(self): data = self._gen_data(5) methods_to_fields = [ (lambda s: s.capitalize(), self.expr.name.capitalize()), (lambda s: data[0][0] in s, self.expr.name.contains(data[0][0], regex=False)), (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])), (lambda s: s.endswith(data[0][0]), self.expr.name.endswith(data[0][0])), (lambda s: s.startswith(data[0][0]), self.expr.name.startswith(data[0][0])), (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])), (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])), (lambda s: s.replace(data[0][0], 'test'), self.expr.name.replace(data[0][0], 'test')), (lambda s: s[0], self.expr.name.get(0)), (lambda s: len(s), self.expr.name.len()), (lambda s: s.ljust(10), self.expr.name.ljust(10)), (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20, fillchar='*')), (lambda s: s.rjust(10), self.expr.name.rjust(10)), (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20, fillchar='*')), (lambda s: s * 4, self.expr.name.repeat(4)), (lambda s: s[2: 10: 2], self.expr.name.slice(2, 10, 2)), (lambda s: s[-5: -1], self.expr.name.slice(-5, -1)), (lambda s: s.title(), self.expr.name.title()), (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)), (lambda s: s.isalnum(), self.expr.name.isalnum()), (lambda s: s.isalpha(), self.expr.name.isalpha()), (lambda s: s.isdigit(), self.expr.name.isdigit()), (lambda s: s.isspace(), self.expr.name.isspace()), (lambda s: s.isupper(), self.expr.name.isupper()), (lambda s: s.istitle(), self.expr.name.istitle()), (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()), (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()), ] fields = [it[1].rename('id'+str(i)) for i, it in enumerate(methods_to_fields)] expr = self.expr[fields] res = self.engine.execute(expr) result = self._get_result(res) for i, it in enumerate(methods_to_fields): method = it[0] first = [method(it[0]) for it in data] second = [it[i] for it in result] self.assertEqual(first, second)
def testJoin(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = 'pyodps_test_engine_table2' self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [['name1', 4, -1], ['name2', 1, -2]] self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2]) try: expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [[to_str('name1'), 4], [to_str('name2'), 1]] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result)) finally: table2.drop()
def testUnion(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = 'pyodps_test_engine_table2' self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [['name3', 5, -1], ['name4', 6, -2]] self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2]) try: expr = self.expr['name', 'id'].distinct().union( expr2[expr2.id2.rename('id'), 'name']) res = self.engine.execute(expr) result = self._get_result(res) expected = [['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5], ['name4', 6]] result = sorted(result) expected = sorted(expected) self.assertEqual(len(result), len(expected)) for e, r in zip(result, expected): self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r]) finally: table2.drop()
def testUnion(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.int64, types.int64]) self._gen_data(data=data) data2 = [ ['name3', 5, -1], ['name4', 6, -2] ] import pandas as pd expr2 = CollectionExpr(_source_data=pd.DataFrame(data2, columns=schema2.names), _schema=schema2) expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name']) res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5], ['name4', 6] ] result = sorted(result) expected = sorted(expected) self.assertEqual(len(result), len(expected)) for e, r in zip(result, expected): self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r])
def testJoin(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.int64, types.int64]) self._gen_data(data=data) data2 = [ ['name1', 4, -1], ['name2', 1, -2] ] import pandas as pd expr2 = CollectionExpr(_source_data=pd.DataFrame(data2, columns=schema2.names), _schema=schema2) expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [ [to_str('name1'), 4], [to_str('name2'), 1] ] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result))
def testJoin(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.int64, types.int64]) self._gen_data(data=data) data2 = [['name1', 4, -1], ['name2', 1, -2]] import pandas as pd expr2 = CollectionExpr(_source_data=pd.DataFrame( data2, columns=schema2.names), _schema=schema2) expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [[to_str('name1'), 4], [to_str('name2'), 1]] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result))
def testUnion(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.int64, types.int64]) self._gen_data(data=data) data2 = [['name3', 5, -1], ['name4', 6, -2]] import pandas as pd expr2 = CollectionExpr(_source_data=pd.DataFrame( data2, columns=schema2.names), _schema=schema2) expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name']) res = self.engine.execute(expr) result = self._get_result(res) expected = [['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5], ['name4', 6]] result = sorted(result) expected = sorted(expected) self.assertEqual(len(result), len(expected)) for e, r in zip(result, expected): self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r])
def testElement(self): data = self._gen_data(5, nullable_field='name') fields = [ self.expr.name.isnull().rename('name1'), self.expr.name.notnull().rename('name2'), self.expr.name.fillna('test').rename('name3'), self.expr.id.isin([1, 2, 3]).rename('id1'), self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'), self.expr.id.notin([1, 2, 3]).rename('id3'), self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'), self.expr.id.between(self.expr.fid, 3).rename('id5'), self.expr.name.fillna('test').switch('test', 'test' + self.expr.name.fillna('test'), 'test2', 'test2' + self.expr.name.fillna('test'), default=self.expr.name).rename('name4'), self.expr.id.cut([100, 200, 300], labels=['xsmall', 'small', 'large', 'xlarge'], include_under=True, include_over=True).rename('id6') ] expr = self.expr[fields] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(data), len(result)) self.assertEqual(len([it for it in data if it[0] is None]), len([it[0] for it in result if it[0]])) self.assertEqual(len([it[0] for it in data if it[0] is not None]), len([it[1] for it in result if it[1]])) self.assertEqual([(it[0] if it[0] is not None else 'test') for it in data], [it[2] for it in result]) self.assertEqual([(it[1] in (1, 2, 3)) for it in data], [it[3] for it in result]) fids = [int(it[2]) for it in data] self.assertEqual([(it[1] in fids) for it in data], [it[4] for it in result]) self.assertEqual([(it[1] not in (1, 2, 3)) for it in data], [it[5] for it in result]) self.assertEqual([(it[1] not in fids) for it in data], [it[6] for it in result]) self.assertEqual([(it[2] <= it[1] <= 3) for it in data], [it[7] for it in result]) self.assertEqual([to_str('testtest' if it[0] is None else it[0]) for it in data], [to_str(it[8]) for it in result]) def get_val(val): if val <= 100: return 'xsmall' elif 100 < val <= 200: return 'small' elif 200 < val <= 300: return 'large' else: return 'xlarge' self.assertEqual([to_str(get_val(it[1])) for it in data], [to_str(it[9]) for it in result])
def testElement(self): data = self._gen_data(5, nullable_field='name') fields = [ self.expr.name.isnull().rename('name1'), self.expr.name.notnull().rename('name2'), self.expr.name.fillna('test').rename('name3'), self.expr.id.isin([1, 2, 3]).rename('id1'), self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'), self.expr.id.notin([1, 2, 3]).rename('id3'), self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'), self.expr.id.between(self.expr.fid, 3).rename('id5'), self.expr.name.fillna('test').switch( 'test', 'test' + self.expr.name.fillna('test'), 'test2', 'test2' + self.expr.name.fillna('test'), default=self.expr.name).rename('name4'), self.expr.id.cut([100, 200, 300], labels=['xsmall', 'small', 'large', 'xlarge'], include_under=True, include_over=True).rename('id6') ] expr = self.expr[fields] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(data), len(result)) self.assertEqual(len([it for it in data if it[0] is None]), len([it[0] for it in result if it[0]])) self.assertEqual(len([it[0] for it in data if it[0] is not None]), len([it[1] for it in result if it[1]])) self.assertEqual([(it[0] if it[0] is not None else 'test') for it in data], [it[2] for it in result]) self.assertEqual([(it[1] in (1, 2, 3)) for it in data], [it[3] for it in result]) fids = [int(it[2]) for it in data] self.assertEqual([(it[1] in fids) for it in data], [it[4] for it in result]) self.assertEqual([(it[1] not in (1, 2, 3)) for it in data], [it[5] for it in result]) self.assertEqual([(it[1] not in fids) for it in data], [it[6] for it in result]) self.assertEqual([(it[2] <= it[1] <= 3) for it in data], [it[7] for it in result]) self.assertEqual( [to_str('testtest' if it[0] is None else it[0]) for it in data], [to_str(it[8]) for it in result]) def get_val(val): if val <= 100: return 'xsmall' elif 100 < val <= 200: return 'small' elif 200 < val <= 300: return 'large' else: return 'xlarge' self.assertEqual([to_str(get_val(it[1])) for it in data], [to_str(it[9]) for it in result])
def testJoin(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] data2 = [['name1', 4, -1], ['name2', 1, -2]] datatypes = lambda *types: [validate_data_type(t) for t in types] schema2 = Schema.from_lists(['name', 'id2', 'id3'], datatypes('string', 'int64', 'int64')) table_name = tn('pyodps_test_engine_table2') table2 = self._create_table_and_insert_data(table_name, schema2, data2) expr2 = CollectionExpr(_source_data=table2, _schema=schema2) self._gen_data(data=data) try: expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [[to_str('name1'), 4], [to_str('name2'), 1]] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result)) expr = self.expr.left_join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) expected = [['name1', 4], ['name2', None], ['name1', 4], ['name1', None], ['name1', None]] self.assertEqual(len(result), 5) self.assertTrue(all(it in expected for it in result)) expr = self.expr.right_join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 4], [None, 1], ] self.assertEqual(len(result), 3) self.assertTrue(all(it in expected for it in result)) if self.sql_engine.name != 'mysql': expr = self.expr.outer_join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 4], ['name2', None], ['name1', None], ['name1', None], [None, 1], ] self.assertEqual(len(result), 6) self.assertTrue(all(it in expected for it in result)) grouped = self.expr.groupby('name').agg( new_id=self.expr.id.sum()).cache() self.engine.execute(self.expr.join(grouped, on='name')) if self.sql_engine.name != 'mysql': expr = self.expr.join(expr2, on=[ 'name', ('id', 'id2') ])[lambda x: x.groupby(Scalar(1)).sort('name').row_number(), ] self.engine.execute(expr) finally: [conn.close() for conn in _engine_to_connections.values()] table2.drop()
def testJoin(self): data = [ ['name1', 4, 5.3, None, None], ['name2', 2, 3.5, None, None], ['name1', 4, 4.2, None, None], ['name1', 3, 2.2, None, None], ['name1', 3, 4.1, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = tn('pyodps_test_engine_table2') self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [['name1', 4, -1], ['name2', 1, -2]] self.odps.write_table(table2, 0, data2) try: expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [[to_str('name1'), 4], [to_str('name2'), 1]] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result)) expr = self.expr.left_join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) expected = [['name1', 4], ['name2', None], ['name1', 4], ['name1', None], ['name1', None]] self.assertEqual(len(result), 5) self.assertTrue(all(it in expected for it in result)) expr = self.expr.right_join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 4], [None, 1], ] self.assertEqual(len(result), 3) self.assertTrue(all(it in expected for it in result)) expr = self.expr.outer_join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 4], ['name2', None], ['name1', None], ['name1', None], [None, 1], ] self.assertEqual(len(result), 6) self.assertTrue(all(it in expected for it in result)) grouped = self.expr.groupby('name').agg( new_id=self.expr.id.sum()).cache() self.engine.execute(self.expr.join(grouped, on='name')) expr = self.expr.join(expr2, on=[ 'name', ('id', 'id2') ])[lambda x: x.groupby(Scalar(1)).sort('name').row_number(), ] self.engine.execute(expr) finally: table2.drop()