def setup(self): schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) table._client = self.config.odps.rest self.expr = CollectionExpr(_source_data=table, _schema=schema) table1 = MockTable(name='pyodps_test_expr_table1', schema=schema) table1._client = self.config.odps.rest self.expr1 = CollectionExpr(_source_data=table1, _schema=schema) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema) table2._client = self.config.odps.rest self.expr2 = CollectionExpr(_source_data=table2, _schema=schema)
def setup(self): schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) table._client = self.config.odps.rest self.expr = CollectionExpr(_source_data=table, _schema=schema) schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64], ['part1', 'part2'], [types.string, types.int64]) table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2) table2._client = self.config.odps.rest self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2) schema3 = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'], [types.int64, types.string, types.Dict(types.string, types.string), types.List(types.string)]) table3 = MockTable(name='pyodps_test_expr_table3', schema=schema3) self.expr3 = CollectionExpr(_source_data=table3, _schema=schema3)
def testSetitemField(self): from odps.df.expr.groupby import GroupByCollectionExpr from odps.df.expr.merge import JoinFieldMergedCollectionExpr expr = self.expr.copy() expr['new_id'] = expr.id + 1 self.assertIn('new_id', expr.schema.names) self.assertIs(expr._fields[-1].lhs.input, expr.input) self.assertEqual(expr.schema.names, ['name', 'id', 'fid', 'new_id']) expr['new_id2'] = expr.id + 2 self.assertIn('new_id2', expr.schema.names) self.assertIs(expr._fields[-1].lhs.input, expr.input) self.assertEqual(expr.schema.names, ['name', 'id', 'fid', 'new_id', 'new_id2']) self.assertIsNone(expr._input._proxy) expr['new_id2'] = expr.new_id expr['new_id3'] = expr.id + expr.new_id2 self.assertIs(expr._fields[-1].lhs.input, expr.input) self.assertIs(expr._fields[-1].rhs.lhs.input, expr.input) self.assertIsInstance(expr, ProjectCollectionExpr) self.assert_(isinstance(expr, ProjectCollectionExpr)) expr2 = expr.groupby('name').agg(expr.id.sum()) expr2['new_id2'] = expr2.id_sum + 1 self.assertIsInstance(expr2, ProjectCollectionExpr) self.assertNotIsInstance(expr2, GroupByCollectionExpr) self.assertNotIsInstance(expr2, FilterCollectionExpr) schema = Schema.from_lists( ['name', 'id', 'fid2', 'fid3'], [types.string, types.int64, types.float64, types.float64]) table = MockTable(name='pyodps_test_expr_table', schema=schema) table._client = self.config.odps.rest expr3 = CollectionExpr(_source_data=table, _schema=schema) expr4 = expr.left_join( expr3, on=[expr.name == expr3.name, expr.id == expr3.id], merge_columns=True) expr4['fid_1'] = expr4.groupby('id').sort('fid2').row_number() self.assertIsInstance(expr4, JoinFieldMergedCollectionExpr) self.assertIsNone(expr4._proxy) expr5 = expr[expr] expr5['name_2'] = expr5.apply(lambda row: row.name, axis=1, reduce=True) self.assertIsInstance(expr5, ProjectCollectionExpr) self.assertIsNone(expr5._proxy)
def testConcat(self): from odps.ml.expr import AlgoCollectionExpr schema = Schema.from_lists(['name', 'id'], [types.string, types.int64]) df = CollectionExpr(_source_data=None, _schema=schema) df1 = CollectionExpr(_source_data=None, _schema=schema) df2 = CollectionExpr(_source_data=None, _schema=schema) schema = Schema.from_lists(['fid', 'fid2'], [types.int64, types.float64]) df3 = CollectionExpr(_source_data=None, _schema=schema) schema = Schema.from_lists(['fid', 'fid2'], [types.int64, types.float64]) table = MockTable(name='pyodps_test_expr_table2', schema=schema) table._client = self.config.odps.rest df4 = CollectionExpr(_source_data=table, _schema=schema) expr = df.concat([df1, df2]) self.assertIsInstance(expr, UnionCollectionExpr) self.assertIsInstance(expr._lhs, CollectionExpr) self.assertIsInstance(expr._rhs, CollectionExpr) expr = df.concat(df3, axis=1) try: import pandas as pd self.assertIsInstance(expr, ConcatCollectionExpr) self.assertIsInstance(expr._lhs, CollectionExpr) self.assertIsInstance(expr._rhs, CollectionExpr) except ImportError: self.assertIsInstance(expr, AlgoCollectionExpr) self.assertIn('name', expr.schema.names) self.assertIn('id', expr.schema.names) self.assertIn('fid', expr.schema.names) self.assertIn('fid2', expr.schema.names) expr = df.concat(df4, axis=1) self.assertIsInstance(expr, AlgoCollectionExpr) self.assertIn('name', expr.schema.names) self.assertIn('id', expr.schema.names) self.assertIn('fid', expr.schema.names) self.assertIn('fid2', expr.schema.names)