Пример #1
0
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(
            ['name', 'id', 'fid', 'isMale', 'scale', 'birth'],
            datatypes('string', 'int64', 'float64', 'boolean', 'decimal',
                      'datetime'), ['ds'], datatypes('string'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        table1 = MockTable(name='pyodps_test_expr_table1', schema=schema)
        self.expr1 = CollectionExpr(_source_data=table1, _schema=schema)

        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema)
        self.expr2 = CollectionExpr(_source_data=table2, _schema=schema)

        schema2 = Schema.from_lists(['name', 'id', 'fid'],
                                    datatypes('string', 'int64', 'float64'),
                                    ['part1', 'part2'],
                                    datatypes('string', 'int64'))
        table3 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        self.expr3 = CollectionExpr(_source_data=table3, _schema=schema2)

        schema3 = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'],
                                    datatypes('int64', 'string',
                                              'dict<string, string>',
                                              'list<string>'))
        table4 = MockTable(name='pyodps_test_expr_table', schema=schema)
        self.expr4 = CollectionExpr(_source_data=table4, _schema=schema3)

        self.maxDiff = None
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(
            ['name', 'id', 'fid', 'isMale', 'scale', 'birth'],
            datatypes('string', 'int64', 'float64', 'boolean', 'decimal',
                      'datetime'), ['ds'], datatypes('string'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        self.expr = CollectionExpr(_source_data=table,
                                   _schema=Schema(columns=schema.columns))

        table1 = MockTable(name='pyodps_test_expr_table1', schema=schema)
        self.expr1 = CollectionExpr(_source_data=table1,
                                    _schema=Schema(columns=schema.columns))

        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema)
        self.expr2 = CollectionExpr(_source_data=table2,
                                    _schema=Schema(columns=schema.columns))

        schema2 = Schema.from_lists(['name', 'id', 'fid'],
                                    datatypes('string', 'int64', 'float64'),
                                    ['part1', 'part2'],
                                    datatypes('string', 'int64'))
        table3 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        self.expr3 = CollectionExpr(_source_data=table3,
                                    _schema=Schema(columns=schema2.columns))
Пример #3
0
    def testSetitemField(self):
        from odps.df.expr.groupby import GroupByCollectionExpr
        from odps.df.expr.merge import JoinFieldMergedCollectionExpr

        expr = self.expr.copy()

        expr['new_id'] = expr.id + 1

        self.assertIn('new_id', expr.schema.names)
        self.assertIs(expr._fields[-1].lhs.input, expr.input)

        self.assertEqual(expr.schema.names, ['name', 'id', 'fid', 'new_id'])

        expr['new_id2'] = expr.id + 2

        self.assertIn('new_id2', expr.schema.names)
        self.assertIs(expr._fields[-1].lhs.input, expr.input)

        self.assertEqual(expr.schema.names,
                         ['name', 'id', 'fid', 'new_id', 'new_id2'])
        self.assertIsNone(expr._input._proxy)

        expr['new_id2'] = expr.new_id

        expr['new_id3'] = expr.id + expr.new_id2
        self.assertIs(expr._fields[-1].lhs.input, expr.input)
        self.assertIs(expr._fields[-1].rhs.lhs.input, expr.input)

        self.assertIsInstance(expr, ProjectCollectionExpr)
        self.assert_(isinstance(expr, ProjectCollectionExpr))

        expr2 = expr.groupby('name').agg(expr.id.sum())
        expr2['new_id2'] = expr2.id_sum + 1
        self.assertIsInstance(expr2, ProjectCollectionExpr)
        self.assertNotIsInstance(expr2, GroupByCollectionExpr)
        self.assertNotIsInstance(expr2, FilterCollectionExpr)

        schema = Schema.from_lists(
            ['name', 'id', 'fid2', 'fid3'],
            [types.string, types.int64, types.float64, types.float64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        table._client = self.config.odps.rest
        expr3 = CollectionExpr(_source_data=table, _schema=schema)

        expr4 = expr.left_join(
            expr3,
            on=[expr.name == expr3.name, expr.id == expr3.id],
            merge_columns=True)
        expr4['fid_1'] = expr4.groupby('id').sort('fid2').row_number()
        self.assertIsInstance(expr4, JoinFieldMergedCollectionExpr)
        self.assertIsNone(expr4._proxy)

        expr5 = expr[expr]
        expr5['name_2'] = expr5.apply(lambda row: row.name,
                                      axis=1,
                                      reduce=True)
        self.assertIsInstance(expr5, ProjectCollectionExpr)
        self.assertIsNone(expr5._proxy)
 def setup(self):
     schema = Schema.from_lists(['name', 'id', 'fid'],
                                [types.string, types.int64, types.float64])
     table = MockTable(name='pyodps_test_expr_table', schema=schema)
     self.expr = CollectionExpr(_source_data=table, _schema=schema)
     table1 = MockTable(name='pyodps_test_expr_table1', schema=schema)
     self.expr1 = CollectionExpr(_source_data=table1, _schema=schema)
     table2 = MockTable(name='pyodps_test_expr_table2', schema=schema)
     self.expr2 = CollectionExpr(_source_data=table2, _schema=schema)
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id'], datatypes('string', 'int64'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        schema2 = Schema.from_lists(['name2', 'id2'], datatypes('string', 'int64'))
        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)
    def setup(self):
        schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        table._client = self.config.odps.rest
        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64],
                                    ['part1', 'part2'], [types.string, types.int64])
        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        table2._client = self.config.odps.rest
        self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)
Пример #7
0
    def testSimpleJoin(self):
        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        schema1 = Schema.from_lists(['id', 'value'], [types.int64, types.string])
        table1 = MockTable(name='pyodps_test_expr_table1', schema=schema1)
        expr1 = CollectionExpr(_source_data=table1, _schema=schema1)

        schema2 = Schema.from_lists(['value', 'num'], [types.string, types.float64])
        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        df = expr.join(expr1).join(expr2)
        self.assertEqual(df.schema.names, ['name', 'id', 'value', 'num'])
Пример #8
0
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'],
                                   datatypes('int64', 'string', 'dict<string, string>', 'list<string>'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid', 'isMale', 'scale', 'birth'],
                                   datatypes('string', 'int64', 'float64', 'boolean', 'decimal', 'datetime'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
    def testSVGFormatter(self):
        t = MockTable(name='pyodps_test_svg', schema=self.schema, _client=self.odps.rest)
        expr = CollectionExpr(_source_data=t, _schema=self.schema)

        expr1 = expr.groupby('name').agg(id=expr['id'].sum())
        expr2 = expr1['name', expr1.id + 3]

        engine = MixedEngine(self.odps)
        dag = engine.compile(expr2)
        nodes = dag.nodes()
        self.assertEqual(len(nodes), 1)
        expr3 = nodes[0].expr
        self.assertIsInstance(expr3, GroupByCollectionExpr)
        dot = ExprExecutionGraphFormatter(dag)._to_dot()
        self.assertNotIn('Projection', dot)

        expr1 = expr.groupby('name').agg(id=expr['id'].sum()).cache()
        expr2 = expr1['name', expr1.id + 3]

        engine = MixedEngine(self.odps)
        dag = engine.compile(expr2)
        nodes = dag.nodes()
        self.assertEqual(len(nodes), 2)
        dot = ExprExecutionGraphFormatter(dag)._to_dot()
        self.assertIn('Projection', dot)
Пример #11
0
 def setup(self):
     from odps.df.expr.tests.core import MockTable
     schema = Schema.from_lists(types._data_types.keys(),
                                types._data_types.values())
     self.expr = CollectionExpr(_source_data=None, _schema=schema)
     self.sourced_expr = CollectionExpr(
         _source_data=MockTable(client=self.odps.rest), _schema=schema)
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                   datatypes('string', 'int64', 'float64'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
        self.ctx = ExecuteContext()
Пример #13
0
    def testGetAttrs(self):
        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        expected = ('_lhs', '_rhs', '_data_type', '_source_data_type', '_name',
                    '_source_name', '_engine', '_cached_args')
        self.assertSequenceEqual(expected, get_attrs(expr.id + 1))
Пример #14
0
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = DynamicSchema.from_schema(
            Schema.from_lists(
                ['name', 'id', 'fid', 'isMale', 'scale', 'birth'],
                datatypes('string', 'int64', 'float64', 'boolean', 'decimal',
                          'datetime')))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        schema2 = DynamicSchema.from_schema(Schema.from_lists(
            ['name2', 'id', 'fid2'], datatypes('string', 'int64', 'float64')),
                                            default_type=types.string)
        table2 = MockTable(name='pyodps_test_expr_tabl2', schema=schema2)

        self.expr = DynamicCollectionExpr(_source_data=table, _schema=schema)
        self.expr2 = DynamicCollectionExpr(_source_data=table2,
                                           _schema=schema2)
Пример #15
0
    def testSimpleJoin(self):
        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        schema1 = Schema.from_lists(['id', 'value'],
                                    [types.int64, types.string])
        table1 = MockTable(name='pyodps_test_expr_table1', schema=schema1)
        expr1 = CollectionExpr(_source_data=table1, _schema=schema1)

        schema2 = Schema.from_lists(['value', 'num'],
                                    [types.string, types.float64])
        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        df = expr.join(expr1).join(expr2)
        adapter = adapter_from_df(df)
        self.assertEqual(len(adapter._bind_node.inputs), 0)
        self.assertEqual(len(adapter._bind_node.outputs), 1)
    def testConcat(self):
        from odps.ml.expr import AlgoCollectionExpr

        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        df = CollectionExpr(_source_data=None, _schema=schema)
        df1 = CollectionExpr(_source_data=None, _schema=schema)
        df2 = CollectionExpr(_source_data=None, _schema=schema)

        schema = Schema.from_lists(['fid', 'fid2'],
                                   [types.int64, types.float64])
        df3 = CollectionExpr(_source_data=None, _schema=schema)

        schema = Schema.from_lists(['fid', 'fid2'],
                                   [types.int64, types.float64])
        table = MockTable(name='pyodps_test_expr_table2', schema=schema)
        table._client = self.config.odps.rest
        df4 = CollectionExpr(_source_data=table, _schema=schema)

        expr = df.concat([df1, df2])
        self.assertIsInstance(expr, UnionCollectionExpr)
        self.assertIsInstance(expr._lhs, CollectionExpr)
        self.assertIsInstance(expr._rhs, CollectionExpr)

        expr = df.concat(df3, axis=1)
        try:
            import pandas as pd
            self.assertIsInstance(expr, ConcatCollectionExpr)
            self.assertIsInstance(expr._lhs, CollectionExpr)
            self.assertIsInstance(expr._rhs, CollectionExpr)
        except ImportError:
            self.assertIsInstance(expr, AlgoCollectionExpr)
        self.assertIn('name', expr.schema.names)
        self.assertIn('id', expr.schema.names)
        self.assertIn('fid', expr.schema.names)
        self.assertIn('fid2', expr.schema.names)

        expr = df.concat(df4, axis=1)
        self.assertIsInstance(expr, AlgoCollectionExpr)
        self.assertIn('name', expr.schema.names)
        self.assertIn('id', expr.schema.names)
        self.assertIn('fid', expr.schema.names)
        self.assertIn('fid2', expr.schema.names)
Пример #17
0
    def testIsChanged(self):
        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)
        expr2 = CollectionExpr(_source_data=table, _schema=schema)

        self.assertFalse(is_changed(expr[expr.id < 3], expr.id))
        self.assertTrue(is_changed(expr[expr.id + 2, ], expr.id))
        self.assertIsNone(is_changed(expr[expr.id < 3], expr2.id))
        self.assertTrue(
            is_changed(expr.groupby('name').agg(id=expr.id.sum()), expr.id))
        self.assertFalse(
            is_changed(expr.groupby('name').agg(id=expr.id.sum()), expr.name))
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid', 'isMale', 'scale', 'birth'],
                                   datatypes('string', 'bigint', 'double', 'boolean', 'decimal', 'datetime'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        self.tb = DataFrame(table)

        import pandas as pd

        df = pd.DataFrame([['name1', 2, 3.14], ['name2', 100, 2.7]], columns=['name', 'id', 'fid'])
        self.pd = DataFrame(df)

        self.expr = self.tb.join(self.pd, on='name')

        self.engine = MixedEngine(self.odps)
Пример #19
0
    def testBizarreField(self):
        def my_func(row):
            return getattr(row, '012') * 2.0

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid', '012'],
                                   datatypes('string', 'int64', 'float64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine.compile(expr.apply(my_func, axis=1, names=['out_col'], types=['float64']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        udtf = get_function(udtf, UDF_CLASS_NAME)
        self.assertEqual([20, 40],
                         runners.simple_run(udtf, [('name1', 1, None, 10), ('name2', 2, None, 20)]))
    def setup(self):
        schema = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        table._client = self.config.odps.rest
        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        schema2 = Schema.from_lists(['name', 'id', 'fid'], [types.string, types.int64, types.float64],
                                    ['part1', 'part2'], [types.string, types.int64])
        table2 = MockTable(name='pyodps_test_expr_table2', schema=schema2)
        table2._client = self.config.odps.rest
        self.expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        schema3 = Schema.from_lists(['id', 'name', 'relatives', 'hobbies'],
                                    [types.int64, types.string, types.Dict(types.string, types.string),
                                     types.List(types.string)])
        table3 = MockTable(name='pyodps_test_expr_table3', schema=schema3)
        self.expr3 = CollectionExpr(_source_data=table3, _schema=schema3)
Пример #21
0
    def testGetAttrs(self):
        schema = Schema.from_lists(['name', 'id'], [types.string, types.int64])
        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        expected = (
            '_lhs',
            '_rhs',
            '_data_type',
            '_source_data_type',
            '_ml_fields_cache',
            '_ml_uplink',
            '_ml_operations',
            '_name',
            '_source_name',
            '_deps',
            '_ban_optimize',
            '_engine',
            '_need_cache',
            '_mem_cache',
            '_id',
            '_args_indexes',
        )
        self.assertSequenceEqual(expected, get_attrs(expr.id + 1))
    def testFilterPushdownThroughMultipleProjection(self):
        schema = Schema.from_lists(list('abcde'), ['string']*5)
        table = MockTable(name='pyodps_test_expr_table3', schema=schema)
        tab = CollectionExpr(_source_data=table, _schema=odps_schema_to_df_schema(schema))

        labels2 = []
        bins2 = []
        for i in range(0, 30):
            a = str(7 * i) + '-' + str(7 * (i + 1))
            b = 7 * i
            bins2.append(b)
            labels2.append(a)

        p1 = tab.select(tab.a,
                        tab.c.astype('int').cut(bins2, labels=labels2, include_over=True).rename('c_cut'),
                        tab.e.astype('int').rename('e'),
                        tab.c.astype('int').rename('c'))
        p1['f'] = p1['e'] / p1['c']
        t = []
        l = []
        for i in range(0, 20):
            a = 1 * i
            b = str(a)
            t.append(a)
            l.append(b)
        p2 = p1.select(p1.a, p1.c_cut, p1.f.cut(bins=t, labels=l, include_over=True).rename('f_cut'))

        expected = "SELECT t1.`a`, CASE WHEN (0 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 7) THEN '0-7' " \
                   "WHEN (7 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 14) " \
                   "THEN '7-14' WHEN (14 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 21) THEN '14-21' " \
                   "WHEN (21 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 28) " \
                   "THEN '21-28' WHEN (28 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 35) THEN '28-35' " \
                   "WHEN (35 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 42) THEN '35-42' " \
                   "WHEN (42 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 49) THEN '42-49' " \
                   "WHEN (49 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 56) " \
                   "THEN '49-56' WHEN (56 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 63) THEN '56-63' " \
                   "WHEN (63 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 70) THEN '63-70' " \
                   "WHEN (70 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 77) " \
                   "THEN '70-77' WHEN (77 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 84) " \
                   "THEN '77-84' WHEN (84 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 91) THEN '84-91' " \
                   "WHEN (91 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 98) " \
                   "THEN '91-98' WHEN (98 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 105) THEN '98-105' " \
                   "WHEN (105 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 112) " \
                   "THEN '105-112' WHEN (112 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 119) THEN '112-119' " \
                   "WHEN (119 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 126) " \
                   "THEN '119-126' WHEN (126 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 133) THEN '126-133' " \
                   "WHEN (133 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 140) " \
                   "THEN '133-140' WHEN (140 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 147) THEN '140-147' " \
                   "WHEN (147 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 154) " \
                   "THEN '147-154' WHEN (154 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 161) THEN '154-161' " \
                   "WHEN (161 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 168) " \
                   "THEN '161-168' WHEN (168 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 175) THEN '168-175' " \
                   "WHEN (175 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 182) " \
                   "THEN '175-182' WHEN (182 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 189) THEN '182-189' " \
                   "WHEN (189 < CAST(t1.`c` AS BIGINT)) AND (CAST(t1.`c` AS BIGINT) <= 196) " \
                   "THEN '189-196' WHEN (196 < CAST(t1.`c` AS BIGINT)) " \
                   "AND (CAST(t1.`c` AS BIGINT) <= 203) THEN '196-203' " \
                   "WHEN 203 < CAST(t1.`c` AS BIGINT) THEN '203-210' END AS `c_cut`, " \
                   "CASE WHEN (0 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 1) THEN '0' " \
                   "WHEN (1 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 2) " \
                   "THEN '1' WHEN (2 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 3) THEN '2' " \
                   "WHEN (3 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 4) " \
                   "THEN '3' WHEN (4 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 5) THEN '4' " \
                   "WHEN (5 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 6) THEN '5' " \
                   "WHEN (6 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 7) " \
                   "THEN '6' WHEN (7 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 8) THEN '7' " \
                   "WHEN (8 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 9) THEN '8' " \
                   "WHEN (9 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 10) " \
                   "THEN '9' WHEN (10 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 11) THEN '10' " \
                   "WHEN (11 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 12) " \
                   "THEN '11' WHEN (12 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 13) THEN '12' " \
                   "WHEN (13 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 14) THEN '13' " \
                   "WHEN (14 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 15) THEN '14' " \
                   "WHEN (15 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 16) THEN '15' " \
                   "WHEN (16 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 17) THEN '16' " \
                   "WHEN (17 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 18) " \
                   "THEN '17' WHEN (18 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 19) THEN '18' " \
                   "WHEN 19 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) THEN '19' END AS `f_cut` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table3` t1 \n" \
                   "WHERE (CASE WHEN (0 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 1) THEN '0' " \
                   "WHEN (1 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 2) " \
                   "THEN '1' WHEN (2 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 3) THEN '2' " \
                   "WHEN (3 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 4) THEN '3' " \
                   "WHEN (4 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 5) THEN '4' " \
                   "WHEN (5 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 6) THEN '5' " \
                   "WHEN (6 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 7) THEN '6' " \
                   "WHEN (7 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 8) THEN '7' " \
                   "WHEN (8 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 9) THEN '8' " \
                   "WHEN (9 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 10) THEN '9' " \
                   "WHEN (10 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 11) THEN '10' " \
                   "WHEN (11 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 12) THEN '11' " \
                   "WHEN (12 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 13) THEN '12' " \
                   "WHEN (13 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 14) THEN '13' " \
                   "WHEN (14 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 15) THEN '14' " \
                   "WHEN (15 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 16) THEN '15' " \
                   "WHEN (16 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 17) THEN '16' " \
                   "WHEN (17 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 18) THEN '17' " \
                   "WHEN (18 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT))) " \
                   "AND ((CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) <= 19) THEN '18' " \
                   "WHEN 19 < (CAST(t1.`e` AS BIGINT) / CAST(t1.`c` AS BIGINT)) THEN '19' END) == '9'"

        self.assertEqual(str(expected), str(ODPSEngine(self.odps).compile(p2[p2.f_cut == '9'], prettify=False)))
Пример #23
0
 def get_table2_df(self):
     schema = Schema.from_lists(['col21', 'col22'],
                                datatypes('string', 'string'))
     table = MockTable(name=TEMP_TABLE_2_NAME, schema=schema)
     return CollectionExpr(_source_data=table, _schema=schema)
Пример #24
0
 def setup(self):
     schema = Schema.from_lists(['name', 'id', 'fid'],
                                [types.string, types.int64, types.float64])
     table = MockTable(name='pyodps_test_query_table', schema=schema)
     table._client = self.config.odps.rest
     self.expr = CollectionExpr(_source_data=table, _schema=schema)