def testRecordSetAndGetByName(self): s = Schema.from_lists(['col%s' % i for i in range(8)], [ 'bigint', 'double', 'string', 'datetime', 'boolean', 'decimal', 'array<string>', 'map<string,bigint>' ]) r = Record(schema=s) r['col0'] = 1 r['col1'] = 1.2 r['col2'] = 'abc' r['col3'] = datetime(2016, 1, 1) r['col4'] = True r['col5'] = _decimal.Decimal('1.111') r['col6'] = ['a', 'b'] r['col7'] = OrderedDict({'a': 1}) self.assertSequenceEqual(r.values, [ 1, 1.2, 'abc', datetime(2016, 1, 1), True, _decimal.Decimal('1.111'), ['a', 'b'], OrderedDict({'a': 1}) ]) self.assertEquals(1, r['col0']) self.assertEquals(1.2, r['col1']) self.assertEquals('abc', r['col2']) self.assertEquals(datetime(2016, 1, 1), r['col3']) self.assertEquals(True, r['col4']) self.assertEquals(_decimal.Decimal('1.111'), r['col5']) self.assertEquals(['a', 'b'], r['col6']) self.assertEquals(OrderedDict({'a': 1}), r['col7'])
def testRecordSetAndGetByIndex(self): s = Schema.from_lists(['col%s' % i for i in range(8)], [ 'bigint', 'double', 'string', 'datetime', 'boolean', 'decimal', 'array<string>', 'map<string,bigint>' ]) s.build_snapshot() if options.force_py: self.assertIsNone(s._snapshot) else: self.assertIsNotNone(s._snapshot) r = Record(schema=s) r[0] = 1 r[1] = 1.2 r[2] = 'abc' r[3] = datetime(2016, 1, 1) r[4] = True r[5] = _decimal.Decimal('1.111') r[6] = ['a', 'b'] r[7] = OrderedDict({'a': 1}) self.assertSequenceEqual(r.values, [ 1, 1.2, 'abc', datetime(2016, 1, 1), True, _decimal.Decimal('1.111'), ['a', 'b'], OrderedDict({'a': 1}) ]) self.assertEquals(1, r[0]) self.assertEquals(1.2, r[1]) self.assertEquals('abc', r[2]) self.assertEquals(datetime(2016, 1, 1), r[3]) self.assertEquals(True, r[4]) self.assertEquals(_decimal.Decimal('1.111'), r[5]) self.assertEquals(['a', 'b'], r[6]) self.assertEquals(OrderedDict({'a': 1}), r[7]) self.assertEquals([1, 1.2], r[:2])
def _gen_data(self): return [ ('hello world', 2**63-1, math.pi, datetime(2015, 9, 19, 2, 11, 25, 33000), True, Decimal('3.14'), ['simple', 'easy'], OrderedDict({'s': 1})), ('goodbye', 222222, math.e, datetime(2020, 3, 10), False, Decimal('2.555555'), ['true', None], OrderedDict({'true': 1})), ('c'*300, -2**63+1, -2.222, datetime(1999, 5, 25, 3, 10), True, Decimal(22222), ['false'], OrderedDict({'false': 0})), ]
def testPandasPersistODPS2(self): import pandas as pd import numpy as np data_int8 = np.random.randint(0, 10, (1, ), dtype=np.int8) data_int16 = np.random.randint(0, 10, (1, ), dtype=np.int16) data_int32 = np.random.randint(0, 10, (1, ), dtype=np.int32) data_int64 = np.random.randint(0, 10, (1, ), dtype=np.int64) data_float32 = np.random.random((1, )).astype(np.float32) data_float64 = np.random.random((1, )).astype(np.float64) df = DataFrame( pd.DataFrame( OrderedDict([('data_int8', data_int8), ('data_int16', data_int16), ('data_int32', data_int32), ('data_int64', data_int64), ('data_float32', data_float32), ('data_float64', data_float64)]))) tmp_table_name = tn('pyodps_test_mixed_persist_odps2_types') self.odps.delete_table(tmp_table_name, if_exists=True) df.persist(tmp_table_name, lifecycle=1, drop_table=True, odps=self.odps) t = self.odps.get_table(tmp_table_name) expected_types = [ odps_types.tinyint, odps_types.smallint, odps_types.int_, odps_types.bigint, odps_types.float_, odps_types.double ] self.assertEqual(expected_types, t.schema.types)
def _gen_random_map(self, random_map_type): size = random.randint(100, 500) random_map_type = types.validate_data_type(random_map_type) key_arrays = self._gen_random_array(random_map_type.key_type, size) value_arrays = self._gen_random_array(random_map_type.value_type, size) m = OrderedDict(zip(key_arrays, value_arrays)) return m
def testCreateTableDDL(self): from odps.compat import OrderedDict from odps.models import Table test_table_name = tn('pyodps_t_tmp_table_ddl') schema = Schema.from_lists(['id', 'name'], ['bigint', 'string'], [ 'ds', ], [ 'string', ]) self.odps.delete_table(test_table_name, if_exists=True) table = self.odps.create_table(test_table_name, schema, lifecycle=10) ddl = table.get_ddl() self.assertNotIn('EXTERNAL', ddl) self.assertNotIn('NOT EXISTS', ddl) for col in table.schema.names: self.assertIn(col, ddl) ddl = table.get_ddl(if_not_exists=True) self.assertIn('NOT EXISTS', ddl) ddl = Table.gen_create_table_sql( 'test_external_table', schema, comment='TEST_COMMENT', storage_handler='com.aliyun.odps.CsvStorageHandler', serde_properties=OrderedDict([('name1', 'value1'), ('name2', 'value2')]), location='oss://mock_endpoint/mock_bucket/mock_path/', ) self.assertEqual( ddl, textwrap.dedent(""" CREATE EXTERNAL TABLE `test_external_table` ( `id` BIGINT, `name` STRING ) COMMENT 'TEST_COMMENT' PARTITIONED BY ( `ds` STRING ) STORED BY 'com.aliyun.odps.CsvStorageHandler' WITH SERDEPROPERTIES ( 'name1' = 'value1', 'name2' = 'value2' ) LOCATION 'oss://mock_endpoint/mock_bucket/mock_path/' """).strip())
def testPivotTable(self): data = [['name1', 1, 1.0, True], ['name1', 1, 5.0, True], ['name1', 2, 2.0, True], ['name2', 1, 3.0, False], ['name2', 3, 4.0, False]] table_name = tn('pyodps_test_mixed_engine_pivot_table') self.odps.delete_table(table_name, if_exists=True) table = self.odps.create_table( name=table_name, schema=Schema.from_lists( ['name', 'id', 'fid', 'ismale'], ['string', 'bigint', 'double', 'boolean'])) expr = DataFrame(table) try: self.odps.write_table(table, 0, data) expr1 = expr.pivot_table(rows='name', values='fid') res = self.engine.execute(expr1) result = self._get_result(res) expected = [ ['name1', 8.0 / 3], ['name2', 3.5], ] self.assertEqual(sorted(result), sorted(expected)) expr2 = expr.pivot_table(rows='name', values='fid', aggfunc=['mean', 'sum']) res = self.engine.execute(expr2) result = self._get_result(res) expected = [ ['name1', 8.0 / 3, 8.0], ['name2', 3.5, 7.0], ] self.assertEqual(res.schema.names, ['name', 'fid_mean', 'fid_sum']) self.assertEqual(sorted(result), sorted(expected)) expr3 = expr.pivot_table(rows='id', values='fid', columns='name', fill_value=0).distinct() res = self.engine.execute(expr3) result = self._get_result(res) expected = [[1, 3.0, 3.0], [2, 2.0, 0], [3, 0, 4.0]] self.assertEqual(res.schema.names, ['id', 'name1_fid_mean', 'name2_fid_mean']) self.assertEqual(result, expected) class Agg(object): def buffer(self): return [0] def __call__(self, buffer, val): buffer[0] += val def merge(self, buffer, pbuffer): buffer[0] += pbuffer[0] def getvalue(self, buffer): return buffer[0] aggfuncs = OrderedDict([('my_sum', Agg), ('mean', 'mean')]) expr4 = expr.pivot_table(rows='id', values='fid', columns='name', fill_value=0, aggfunc=aggfuncs) res = self.engine.execute(expr4) result = self._get_result(res) expected = [[1, 6.0, 3.0, 3.0, 3.0], [2, 2.0, 0, 2.0, 0], [3, 0, 4.0, 0, 4.0]] self.assertEqual(res.schema.names, [ 'id', 'name1_fid_my_sum', 'name2_fid_my_sum', 'name1_fid_mean', 'name2_fid_mean' ]) self.assertEqual(result, expected) finally: table.drop()
def testPivotTable(self): from odps.df.expr.dynamic import DynamicMixin expr = self.expr.pivot_table(values='int8', rows='float32') self.assertNotIsInstance(expr, DynamicMixin) self.assertEqual(expr.schema.names, ['float32', 'int8_mean']) expr = self.expr.pivot_table(values=('int16', 'int32'), rows=['float32', 'int8']) self.assertEqual(expr.schema.names, ['float32', 'int8', 'int16_mean', 'int32_mean']) expr = self.expr.pivot_table(values=('int16', 'int32'), rows=['string', 'boolean'], aggfunc=['mean', 'sum']) self.assertEqual(expr.schema.names, [ 'string', 'boolean', 'int16_mean', 'int32_mean', 'int16_sum', 'int32_sum' ]) self.assertEqual(expr.schema.types, [ types.string, types.boolean, types.float64, types.float64, types.int16, types.int32 ]) @output(['my_mean'], ['float']) class Aggregator(object): def buffer(self): return [0.0, 0] def __call__(self, buffer, val): buffer[0] += val buffer[1] += 1 def merge(self, buffer, pbuffer): buffer[0] += pbuffer[0] buffer[1] += pbuffer[1] def getvalue(self, buffer): if buffer[1] == 0: return 0.0 return buffer[0] / buffer[1] expr = self.expr.pivot_table(values='int16', rows='string', aggfunc=Aggregator) self.assertEqual(expr.schema.names, ['string', 'int16_my_mean']) self.assertEqual(expr.schema.types, [types.string, types.float64]) aggfunc = OrderedDict([('my_agg', Aggregator), ('my_agg2', Aggregator)]) expr = self.expr.pivot_table(values='int16', rows='string', aggfunc=aggfunc) self.assertEqual(expr.schema.names, ['string', 'int16_my_agg', 'int16_my_agg2']) self.assertEqual(expr.schema.types, [types.string, types.float64, types.float64]) expr = self.expr.pivot_table(values='int16', columns='boolean', rows='string') self.assertIsInstance(expr, DynamicMixin)
from odps.sqlalchemy_odps import update_test_setting _ONE_ROW_COMPLEX_CONTENTS = [ True, 127, 32767, 2147483647, 9223372036854775807, 0.5, 0.25, 'a string', pd.Timestamp(1970, 1, 1, 8), b'123', [1, 2], OrderedDict({ 1: 2, 3: 4 }), OrderedDict({ "a": 1, "b": 2 }), decimal.Decimal('0.1'), ] except ImportError: dependency_installed = False else: dependency_installed = True def create_one_row(o): table = 'one_row'