Пример #1
0
 def setup(self):
     schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64])
     table = MockTable(name='pyodps_test_expr_table', schema=schema)
     table._client = self.config.odps.rest
     self.expr = CollectionExpr(_source_data=table, _schema=schema)
     table1 = MockTable(name='pyodps_test_expr_table1', schema=schema)
     table1._client = self.config.odps.rest
     self.expr1 = CollectionExpr(_source_data=table1, _schema=schema)
     table2 = MockTable(name='pyodps_test_expr_table2', schema=schema)
     table2._client = self.config.odps.rest
     self.expr2 = CollectionExpr(_source_data=table2, _schema=schema)
Пример #2
0
 def setup(self):
     schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64])
     table = MockTable(name='pyodps_test_expr_table', schema=schema)
     table._client = self.config.odps.rest
     self.expr = CollectionExpr(_source_data=table, _schema=schema)
     table1 = MockTable(name='pyodps_test_expr_table1', schema=schema)
     table1._client = self.config.odps.rest
     self.expr1 = CollectionExpr(_source_data=table1, _schema=schema)
     table2 = MockTable(name='pyodps_test_expr_table2', schema=schema)
     table2._client = self.config.odps.rest
     self.expr2 = CollectionExpr(_source_data=table2, _schema=schema)
    def setup(self):
        schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        table._client = self.config.odps.rest
        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64],
                                    ['part1', 'part2'], [types.string, types.int64])
        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        table2._client = self.config.odps.rest
        self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        schema3 = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'],
                                    [types.int64, types.string, types.Dict(types.string, types.string),
                                     types.List(types.string)])
        table3 = MockTable(name='pyodps_test_expr_table3', schema=schema3)
        self.expr3 = CollectionExpr(_source_data=table3, _schema=schema3)
Пример #4
0
    def testSetitemField(self):
        from odps.df.expr.groupby import GroupByCollectionExpr
        from odps.df.expr.merge import JoinFieldMergedCollectionExpr

        expr = self.expr.copy()

        expr['new_id'] = expr.id + 1

        self.assertIn('new_id', expr.schema.names)
        self.assertIs(expr._fields[-1].lhs.input, expr.input)

        self.assertEqual(expr.schema.names, ['name', 'id', 'fid', 'new_id'])

        expr['new_id2'] = expr.id + 2

        self.assertIn('new_id2', expr.schema.names)
        self.assertIs(expr._fields[-1].lhs.input, expr.input)

        self.assertEqual(expr.schema.names,
                         ['name', 'id', 'fid', 'new_id', 'new_id2'])
        self.assertIsNone(expr._input._proxy)

        expr['new_id2'] = expr.new_id

        expr['new_id3'] = expr.id + expr.new_id2
        self.assertIs(expr._fields[-1].lhs.input, expr.input)
        self.assertIs(expr._fields[-1].rhs.lhs.input, expr.input)

        self.assertIsInstance(expr, ProjectCollectionExpr)
        self.assert_(isinstance(expr, ProjectCollectionExpr))

        expr2 = expr.groupby('name').agg(expr.id.sum())
        expr2['new_id2'] = expr2.id_sum + 1
        self.assertIsInstance(expr2, ProjectCollectionExpr)
        self.assertNotIsInstance(expr2, GroupByCollectionExpr)
        self.assertNotIsInstance(expr2, FilterCollectionExpr)

        schema = Schema.from_lists(
            ['name', 'id', 'fid2', 'fid3'],
            [types.string, types.int64, types.float64, types.float64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        table._client = self.config.odps.rest
        expr3 = CollectionExpr(_source_data=table, _schema=schema)

        expr4 = expr.left_join(
            expr3,
            on=[expr.name == expr3.name, expr.id == expr3.id],
            merge_columns=True)
        expr4['fid_1'] = expr4.groupby('id').sort('fid2').row_number()
        self.assertIsInstance(expr4, JoinFieldMergedCollectionExpr)
        self.assertIsNone(expr4._proxy)

        expr5 = expr[expr]
        expr5['name_2'] = expr5.apply(lambda row: row.name,
                                      axis=1,
                                      reduce=True)
        self.assertIsInstance(expr5, ProjectCollectionExpr)
        self.assertIsNone(expr5._proxy)
    def testConcat(self):
        from odps.ml.expr import AlgoCollectionExpr

        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        df = CollectionExpr(_source_data=None, _schema=schema)
        df1 = CollectionExpr(_source_data=None, _schema=schema)
        df2 = CollectionExpr(_source_data=None, _schema=schema)

        schema = Schema.from_lists(['fid', 'fid2'],
                                   [types.int64, types.float64])
        df3 = CollectionExpr(_source_data=None, _schema=schema)

        schema = Schema.from_lists(['fid', 'fid2'],
                                   [types.int64, types.float64])
        table = MockTable(name='pyodps_test_expr_table2', schema=schema)
        table._client = self.config.odps.rest
        df4 = CollectionExpr(_source_data=table, _schema=schema)

        expr = df.concat([df1, df2])
        self.assertIsInstance(expr, UnionCollectionExpr)
        self.assertIsInstance(expr._lhs, CollectionExpr)
        self.assertIsInstance(expr._rhs, CollectionExpr)

        expr = df.concat(df3, axis=1)
        try:
            import pandas as pd
            self.assertIsInstance(expr, ConcatCollectionExpr)
            self.assertIsInstance(expr._lhs, CollectionExpr)
            self.assertIsInstance(expr._rhs, CollectionExpr)
        except ImportError:
            self.assertIsInstance(expr, AlgoCollectionExpr)
        self.assertIn('name', expr.schema.names)
        self.assertIn('id', expr.schema.names)
        self.assertIn('fid', expr.schema.names)
        self.assertIn('fid2', expr.schema.names)

        expr = df.concat(df4, axis=1)
        self.assertIsInstance(expr, AlgoCollectionExpr)
        self.assertIn('name', expr.schema.names)
        self.assertIn('id', expr.schema.names)
        self.assertIn('fid', expr.schema.names)
        self.assertIn('fid2', expr.schema.names)