Пример #1
0
    def test_index_projection_with_schema(self):
        scanner = self.table.scanner()
        scanner.set_projected_column_indexes([0, 1])

        scanner.set_fault_tolerant()
        scanner.open()

        tuples = scanner.read_all_tuples()

        # Build schema to check against
        builder = kudu.schema_builder()
        builder.add_column('key', kudu.int32, nullable=False)
        builder.add_column('int_val', kudu.int32)
        builder.set_primary_keys(['key'])
        expected_schema = builder.build()

        # Build new schema from projection schema
        builder = kudu.schema_builder()
        for col in scanner.get_projection_schema():
            builder.copy_column(col)
        builder.set_primary_keys(['key'])
        new_schema = builder.build()

        self.assertEqual(tuples, [t[0:2] for t in self.tuples])
        self.assertTrue(expected_schema.equals(new_schema))
Пример #2
0
    def test_index_projection_with_schema(self):
        scanner = self.table.scanner()
        scanner.set_projected_column_indexes([0, 1])

        scanner.set_fault_tolerant()
        scanner.open()

        tuples = scanner.read_all_tuples()

        # Build schema to check against
        builder = kudu.schema_builder()
        builder.add_column('key', kudu.int32, nullable=False)
        builder.add_column('int_val', kudu.int32)
        builder.set_primary_keys(['key'])
        expected_schema = builder.build()

        # Build new schema from projection schema
        builder = kudu.schema_builder()
        for col in scanner.get_projection_schema():
            builder.copy_column(col)
        builder.set_primary_keys(['key'])
        new_schema = builder.build()

        self.assertEqual(tuples, [t[0:2] for t in self.tuples])
        self.assertTrue(expected_schema.equals(new_schema))
Пример #3
0
    def create_tables(self):
        for table in ['measurements']:
            if self._kudu_client.table_exists(table):
                self._kudu_client.delete_table(table)

        # Define a schema for a tag_mappings table
        tm_builder = kudu.schema_builder()
        tm_builder.add_column('tag_id').type(
            kudu.int32).nullable(False).primary_key()
        tm_builder.add_column('sensor_name').type(kudu.string).nullable(False)
        tm_schema = tm_builder.build()

        # Define partitioning schema
        tm_partitioning = Partitioning().add_hash_partitions(
            column_names=['tag_id'], num_buckets=3)

        # Define a schema for a raw_measurements table
        rm_builder = kudu.schema_builder()
        rm_builder.add_column('record_time').type(kudu.string).nullable(False)
        rm_builder.add_column('tag_id').type(kudu.int32).nullable(False)
        rm_builder.add_column('value').type(kudu.double).nullable(False)
        rm_builder.set_primary_keys(['record_time', 'tag_id'])
        rm_schema = rm_builder.build()

        # Define partitioning schema
        rm_partitioning = Partitioning().add_hash_partitions(
            column_names=['record_time', 'tag_id'], num_buckets=3)

        # Define a schema for a measurements table
        m_builder = kudu.schema_builder()
        m_builder.add_column('record_time').type(kudu.string).nullable(False)
        for device_id in range(0, self._config['sensors']):
            m_builder.add_column('Sensor_%d' % device_id).type(
                kudu.double).nullable(True)
        m_builder.set_primary_keys(['record_time'])
        m_schema = m_builder.build()

        # Define partitioning schema
        m_partitioning = Partitioning().add_hash_partitions(
            column_names=['record_time'], num_buckets=3)

        # Create new table
        self._kudu_client.create_table('tag_mappings',
                                       tm_schema,
                                       tm_partitioning,
                                       n_replicas=3)
        self._kudu_client.create_table('raw_measurements',
                                       rm_schema,
                                       rm_partitioning,
                                       n_replicas=3)
        self._kudu_client.create_table('measurements',
                                       m_schema,
                                       m_partitioning,
                                       n_replicas=3)
Пример #4
0
    def test_set_column_spec_pk(self):
        builder = kudu.schema_builder()
        key = (builder.add_column('key', 'int64', nullable=False)
               .primary_key())
        assert key is not None
        schema = builder.build()
        assert 'key' in schema.primary_keys()

        builder = kudu.schema_builder()
        key = (builder.add_column('key', 'int64', nullable=False,
                                  primary_key=True))
        schema = builder.build()
        assert 'key' in schema.primary_keys()
Пример #5
0
    def test_varchar_invalid_length(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('varchar').primary_key().length(
            0).nullable(False))

        with self.assertRaises(kudu.KuduInvalidArgument):
            builder.build()
Пример #6
0
    def test_length_on_non_varchar_column(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('decimal').primary_key().nullable(
            False).length(10))

        with self.assertRaises(kudu.KuduInvalidArgument):
            builder.build()
Пример #7
0
    def test_kudu_schema_convert(self):
        spec = [
            # name, type, is_nullable, is_primary_key
            ('a', dt.Int8(False), 'int8', False, True),
            ('b', dt.Int16(False), 'int16', False, True),
            ('c', dt.Int32(False), 'int32', False, False),
            ('d', dt.Int64(True), 'int64', True, False),
            ('e', dt.String(True), 'string', True, False),
            ('f', dt.Boolean(False), 'bool', False, False),
            ('g', dt.Float(False), 'float', False, False),
            ('h', dt.Double(True), 'double', True, False),

            # TODO
            # ('i', 'binary', False, False),
            ('j', dt.Timestamp(True), 'timestamp', True, False)
        ]

        builder = kudu.schema_builder()
        primary_keys = []
        ibis_types = []
        for name, itype, type_, is_nullable, is_primary_key in spec:
            builder.add_column(name, type_, nullable=is_nullable)

            if is_primary_key:
                primary_keys.append(name)

            ibis_types.append((name, itype))

        builder.set_primary_keys(primary_keys)
        kschema = builder.build()

        ischema = ksupport.schema_kudu_to_ibis(kschema)
        expected = ibis.schema(ibis_types)

        assert_equal(ischema, expected)
Пример #8
0
    def test_precision_on_non_decimal_column(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('int32').primary_key().nullable(
            False).precision(9).scale(2))

        with self.assertRaises(kudu.KuduInvalidArgument):
            builder.build()
Пример #9
0
    def example_schema(cls):
        builder = kudu.schema_builder()
        builder.add_column('key', kudu.int32, nullable=False)
        builder.add_column('int_val', kudu.int32)
        builder.add_column('string_val', kudu.string)
        builder.set_primary_keys(['key'])

        return builder.build()
Пример #10
0
    def test_type(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('int32').primary_key().nullable(False))
        schema = builder.build()

        tp = schema[0].type
        assert tp.name == 'int32'
        assert tp.type == kudu.schema.INT32
Пример #11
0
    def test_schema_equals(self):
        assert self.schema.equals(self.schema)

        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False, primary_key=True)
        schema = builder.build()

        assert not self.schema.equals(schema)
Пример #12
0
    def example_schema(cls):
        builder = kudu.schema_builder()
        builder.add_column('key', kudu.int32, nullable=False)
        builder.add_column('int_val', kudu.int32)
        builder.add_column('string_val', kudu.string)
        builder.set_primary_keys(['key'])

        return builder.build()
Пример #13
0
    def test_schema_equals(self):
        assert self.schema.equals(self.schema)

        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False, primary_key=True)
        schema = builder.build()

        assert not self.schema.equals(schema)
Пример #14
0
    def test_unsupported_col_spec_methods_for_create_table(self):
        builder = kudu.schema_builder()
        builder.add_column('test', 'int64').rename('test')
        with self.assertRaises(kudu.KuduNotSupported):
            builder.build()

        builder.add_column('test', 'int64').remove_default()
        with self.assertRaises(kudu.KuduNotSupported):
            builder.build()
Пример #15
0
    def test_date(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('date').primary_key().nullable(False))
        schema = builder.build()

        column = schema[0]
        tp = column.type
        assert tp.name == 'date'
        assert tp.type == kudu.schema.DATE
Пример #16
0
    def example_schema(cls):
        builder = kudu.schema_builder()
        builder.add_column('key', kudu.int32, nullable=False)
        builder.add_column('int_val', kudu.int32)
        builder.add_column('string_val', kudu.string, default='nothing')
        builder.add_column('unixtime_micros_val', kudu.unixtime_micros)
        builder.set_primary_keys(['key'])

        return builder.build()
Пример #17
0
    def example_schema(cls):
        builder = kudu.schema_builder()
        builder.add_column('key', kudu.int32, nullable=False)
        builder.add_column('int_val', kudu.int32)
        builder.add_column('string_val', kudu.string, default='nothing')
        builder.add_column('unixtime_micros_val', kudu.unixtime_micros)
        builder.set_primary_keys(['key'])

        return builder.build()
Пример #18
0
    def test_decimal_without_precision(self):
        builder = kudu.schema_builder()
        (builder.add_column('key')
         .type('decimal')
         .primary_key()
         .nullable(False))

        with self.assertRaises(kudu.KuduInvalidArgument):
            builder.build()
Пример #19
0
    def test_type(self):
        builder = kudu.schema_builder()
        (builder.add_column('key')
         .type('int32')
         .primary_key()
         .nullable(False))
        schema = builder.build()

        tp = schema[0].type
        assert tp.name == 'int32'
        assert tp.type == kudu.schema.INT32
Пример #20
0
    def test_varchar(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('varchar').primary_key().nullable(
            False).length(10))
        schema = builder.build()

        column = schema[0]
        tp = column.type
        assert tp.name == 'varchar'
        assert tp.type == kudu.schema.VARCHAR
        ta = column.type_attributes
        assert ta.length == 10
Пример #21
0
    def setUp(self):
        self.columns = [('one', 'int32', False), ('two', 'int8', False),
                        ('three', 'double', True), ('four', 'string', False)]

        self.primary_keys = ['one', 'two']

        self.builder = kudu.schema_builder()
        for name, typename, nullable in self.columns:
            self.builder.add_column(name, typename, nullable=nullable)

        self.builder.set_primary_keys(self.primary_keys)
        self.schema = self.builder.build()
Пример #22
0
    def test_decimal(self):
        builder = kudu.schema_builder()
        (builder.add_column('key').type('decimal').primary_key().nullable(
            False).precision(9).scale(2))
        schema = builder.build()

        column = schema[0]
        tp = column.type
        assert tp.name == 'decimal'
        assert tp.type == kudu.schema.DECIMAL
        ta = column.type_attributes
        assert ta.precision == 9
        assert ta.scale == 2
Пример #23
0
    def setUp(self):
        self.columns = [('one', 'int32', False),
                        ('two', 'int8', False),
                        ('three', 'double', True),
                        ('four', 'string', False)]

        self.primary_keys = ['one', 'two']

        self.builder = kudu.schema_builder()
        for name, typename, nullable in self.columns:
            self.builder.add_column(name, typename, nullable=nullable)

        self.builder.set_primary_keys(self.primary_keys)
        self.schema = self.builder.build()
Пример #24
0
    def test_compression(self):
        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False)

        foo = builder.add_column('foo', 'string').compression('lz4')
        assert foo is not None

        bar = builder.add_column('bar', 'string')
        bar.compression(kudu.COMPRESSION_ZLIB)

        with self.assertRaises(ValueError):
            bar = builder.add_column('qux', 'string', compression='unknown')

        builder.set_primary_keys(['key'])
        builder.build()
Пример #25
0
    def test_compression(self):
        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False)

        foo = builder.add_column('foo', 'string').compression('lz4')
        assert foo is not None

        bar = builder.add_column('bar', 'string')
        bar.compression(kudu.COMPRESSION_ZLIB)

        with self.assertRaises(ValueError):
            bar = builder.add_column('qux', 'string', compression='unknown')

        builder.set_primary_keys(['key'])
        builder.build()
Пример #26
0
    def test_encoding(self):
        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False)

        foo = builder.add_column('foo', 'string').encoding('rle')
        assert foo is not None

        bar = builder.add_column('bar', 'string')
        bar.encoding(kudu.ENCODING_PLAIN)

        with self.assertRaises(ValueError):
            builder.add_column('qux', 'string', encoding='unknown')

        builder.set_primary_keys(['key'])
        builder.build()
Пример #27
0
    def test_encoding(self):
        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False)

        foo = builder.add_column('foo', 'string').encoding('rle')
        assert foo is not None

        bar = builder.add_column('bar', 'string')
        bar.encoding(kudu.ENCODING_PLAIN)

        with self.assertRaises(ValueError):
            builder.add_column('qux', 'string', encoding='unknown')

        builder.set_primary_keys(['key'])
        builder.build()
Пример #28
0
    def test_nullable_not_null(self):
        builder = kudu.schema_builder()
        (builder.add_column('key', 'int64', nullable=False).primary_key())

        builder.add_column('data1', 'double').nullable(True)
        builder.add_column('data2', 'double').nullable(False)
        builder.add_column('data3', 'double', nullable=True)
        builder.add_column('data4', 'double', nullable=False)

        schema = builder.build()

        assert not schema[0].nullable
        assert schema[1].nullable
        assert not schema[2].nullable

        assert schema[3].nullable
        assert not schema[4].nullable
Пример #29
0
    def test_decimal(self):
        builder = kudu.schema_builder()
        (builder.add_column('key')
         .type('decimal')
         .primary_key()
         .nullable(False)
         .precision(9)
         .scale(2))
        schema = builder.build()

        column = schema[0]
        tp = column.type
        assert tp.name == 'decimal'
        assert tp.type == kudu.schema.DECIMAL
        ta = column.type_attributes
        assert ta.precision == 9
        assert ta.scale == 2
Пример #30
0
    def test_nullable_not_null(self):
        builder = kudu.schema_builder()
        (builder.add_column('key', 'int64', nullable=False)
         .primary_key())

        builder.add_column('data1', 'double').nullable(True)
        builder.add_column('data2', 'double').nullable(False)
        builder.add_column('data3', 'double', nullable=True)
        builder.add_column('data4', 'double', nullable=False)

        schema = builder.build()

        assert not schema[0].nullable
        assert schema[1].nullable
        assert not schema[2].nullable

        assert schema[3].nullable
        assert not schema[4].nullable
Пример #31
0
    def test_encoding(self):
        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False)

        available_encodings = ['auto', 'plain', 'prefix', 'bitshuffle',
                               'rle', 'dict', kudu.ENCODING_DICT]
        for enc in available_encodings:
            foo = builder.add_column('foo_%s' % enc, 'string').encoding(enc)
            assert foo is not None
            del foo

        bar = builder.add_column('bar', 'string')
        bar.encoding(kudu.ENCODING_PLAIN)

        with self.assertRaises(ValueError):
            builder.add_column('qux', 'string', encoding='unknown')

        builder.set_primary_keys(['key'])
        builder.build()
Пример #32
0
    def test_encoding(self):
        builder = kudu.schema_builder()
        builder.add_column('key', 'int64', nullable=False)

        available_encodings = [
            'auto', 'plain', 'prefix', 'bitshuffle', 'rle', 'dict',
            kudu.ENCODING_DICT
        ]
        for enc in available_encodings:
            foo = builder.add_column('foo_%s' % enc, 'string').encoding(enc)
            assert foo is not None
            del foo

        bar = builder.add_column('bar', 'string')
        bar.encoding(kudu.ENCODING_PLAIN)

        with self.assertRaises(ValueError):
            builder.add_column('qux', 'string', encoding='unknown')

        builder.set_primary_keys(['key'])
        builder.build()
Пример #33
0
    def on_put(self, req, res, table):
        api = {'table': table, 'success': False}
        data = json.loads(req.bounded_stream.read().decode("utf-8"))

        client = kudu.connect(host='queen', port=7051)

        if not client.table_exists(table):
            builder = kudu.schema_builder()
            builder.add_column('_id').type(
                kudu.string).nullable(False).primary_key()

            if data:
                for i in data:
                    if data[i] == 'string':
                        builder.add_column(i).type(kudu.string)
                    elif data[i] == 'int':
                        builder.add_column(i).type(kudu.int64)
                    elif data[i] == 'time':
                        builder.add_column(i).type(kudu.unixtime_micros)
                    elif data[i] == 'float':
                        builder.add_column(i).type(kudu.float)
                    elif data[i] == 'double':
                        builder.add_column(i).type(kudu.float)
                    elif data[i] == 'decimal':
                        builder.add_column(i).type(kudu.decimal)
                    elif data[i] == 'binary':
                        builder.add_column(i).type(kudu.binary)
                    elif data[i] == 'bool':
                        builder.add_column(i).type(kudu.bool)
                    else:
                        builder.add_column(i).type(kudu.string)

            schema = builder.build()
            partitioning = Partitioning().add_hash_partitions(
                column_names=['_id'], num_buckets=3)
            client.create_table(table, schema, partitioning)
            api['success'] = True

        res.body = json.dumps(api)
        res.status = falcon.HTTP_200
Пример #34
0
def open_or_create_table(client, table, drop=False):
    """Based on the default dstat column names create a new table indexed by a timstamp col"""
    exists = False
    if client.table_exists(table):
        exists = True
        if drop:
            client.delete_table(table)
            exists = False

    if not exists:
        # Create the schema for the table, basically all float cols
        builder = kudu.schema_builder()
        builder.add_column("ts", kudu.int64, nullable=False, primary_key=True)
        for col in DSTAT_COL_NAMES:
            builder.add_column(col, kudu.float_)
        schema = builder.build()

        # Create hash partitioning buckets
        partitioning = Partitioning().add_hash_partitions('ts', 2)

        client.create_table(table, schema, partitioning)

    return client.table(table)
Пример #35
0
def open_or_create_table(client, table, drop=False):
  """Based on the default dstat column names create a new table indexed by a timstamp col"""
  exists = False
  if client.table_exists(table):
    exists = True
    if drop:
      client.delete_table(table)
      exists = False

  if not exists:
    # Create the schema for the table, basically all float cols
    builder = kudu.schema_builder()
    builder.add_column("ts", kudu.int64, nullable=False, primary_key=True)
    for col in DSTAT_COL_NAMES:
      builder.add_column(col, kudu.float_)
    schema = builder.build()

    # Create hash partitioning buckets
    partitioning = Partitioning().add_hash_partitions('ts', 2)

    client.create_table(table, schema, partitioning)

  return client.table(table)
Пример #36
0
    def test_kudu_schema_convert(self):
        spec = [
            # name, type, is_nullable, is_primary_key
            ('a', dt.Int8(False), 'int8', False, True),
            ('b', dt.Int16(False), 'int16', False, True),
            ('c', dt.Int32(False), 'int32', False, False),
            ('d', dt.Int64(True), 'int64', True, False),
            ('e', dt.String(True), 'string', True, False),
            ('f', dt.Boolean(False), 'bool', False, False),
            ('g', dt.Float(False), 'float', False, False),
            ('h', dt.Double(True), 'double', True, False),

            # TODO
            # ('i', 'binary', False, False),

            ('j', dt.Timestamp(True), 'timestamp', True, False)
        ]

        builder = kudu.schema_builder()
        primary_keys = []
        ibis_types = []
        for name, itype, type_, is_nullable, is_primary_key in spec:
            builder.add_column(name, type_, nullable=is_nullable)

            if is_primary_key:
                primary_keys.append(name)

            ibis_types.append((name, itype))

        builder.set_primary_keys(primary_keys)
        kschema = builder.build()

        ischema = ksupport.schema_kudu_to_ibis(kschema)
        expected = ibis.schema(ibis_types)

        assert_equal(ischema, expected)
Пример #37
0
    def setUpClass(self):
        """
        Parent class for both the Scan tests and the
        Scan Token tests
        """
        super(TestScanBase, self).setUpClass()

        self.nrows = 100
        table = self.client.table(self.ex_table)
        session = self.client.new_session()

        tuples = []
        for i in range(self.nrows):
            op = table.new_insert()
            tup = i, \
                  i * 2, \
                  'hello_%d' % i if i % 2 == 0 else None, \
                  datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            op['key'] = tup[0]
            op['int_val'] = tup[1]
            if i % 2 == 0:
                op['string_val'] = tup[2]
            op['unixtime_micros_val'] = tup[3]
            session.apply(op)
            tuples.append(tup)
        session.flush()

        self.table = table
        self.tuples = tuples

        # Create table to test all types
        # for various predicate tests
        table_name = 'type-test'
        # Create schema, partitioning and then table
        builder = kudu.schema_builder()
        builder.add_column('key').type(kudu.int64).nullable(False)
        builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False)
        if kudu.CLIENT_SUPPORTS_DECIMAL:
            builder.add_column('decimal_val', type_=kudu.decimal, precision=5, scale=2)
        builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4, encoding='prefix')
        builder.add_column('bool_val', type_=kudu.bool)
        builder.add_column('double_val', type_=kudu.double)
        builder.add_column('int8_val', type_=kudu.int8)
        builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY, encoding='prefix')
        builder.add_column('float_val', type_=kudu.float)
        builder.set_primary_keys(['key', 'unixtime_micros_val'])
        schema = builder.build()

        self.projected_names_w_o_float = [
            col for col in schema.names if col != 'float_val'
        ]

        partitioning = Partitioning() \
            .add_hash_partitions(column_names=['key'], num_buckets=3)\
            .set_range_partition_columns(['unixtime_micros_val'])\
            .add_range_partition(
                upper_bound={'unixtime_micros_val': ("2016-01-01", "%Y-%m-%d")},
                upper_bound_type=kudu.EXCLUSIVE_BOUND
            )\
            .add_range_partition(
                lower_bound={'unixtime_micros_val': datetime.datetime(2016, 1, 1)},
                lower_bound_type='INCLUSIVE',
                upper_bound={'unixtime_micros_val': datetime.datetime(9999, 12, 31)}
            )


        self.client.create_table(table_name, schema, partitioning)
        self.type_table = self.client.table(table_name)

        # Insert new rows
        if kudu.CLIENT_SUPPORTS_DECIMAL:
            self.type_test_rows = [
                (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'),
                 "Test One", True, 1.7976931348623157 * (10^308), 127,
                 b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
                 3.402823 * (10^38)),
                (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'),
                 "测试二", False, 200.1, -1,
                 b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
                 -150.2)
            ]
        else:
            self.type_test_rows = [
                (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
                 "Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
                 b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
                 3.402823 * (10 ^ 38)),
                (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
                 "测试二", False, 200.1, -1,
                 b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
                 -150.2)
            ]
        session = self.client.new_session()
        for row in self.type_test_rows:
            op = self.type_table.new_insert(row)
            session.apply(op)
        session.flush()

        # Remove the float values from the type_test_rows tuples so we can
        # compare the other vals
        self.type_test_rows = [
            tuple[:-1] for tuple in self.type_test_rows
        ]
Пример #38
0
    def setUpClass(self):
        """
        Parent class for both the Scan tests and the
        Scan Token tests
        """
        super(TestScanBase, self).setUpClass()

        self.nrows = 100
        table = self.client.table(self.ex_table)
        session = self.client.new_session()

        tuples = []
        for i in range(self.nrows):
            op = table.new_insert()
            tup = i, \
                  i * 2, \
                  'hello_%d' % i if i % 2 == 0 else None, \
                  datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            op['key'] = tup[0]
            op['int_val'] = tup[1]
            if i % 2 == 0:
                op['string_val'] = tup[2]
            elif i % 3 == 0:
                op['string_val'] = None
            op['unixtime_micros_val'] = tup[3]
            session.apply(op)
            tuples.append(tup)
        session.flush()

        self.table = table
        self.tuples = tuples

        # Create table to test all types
        # for various predicate tests
        table_name = 'type-test'
        # Create schema, partitioning and then table
        builder = kudu.schema_builder()
        builder.add_column('key').type(
            kudu.int64).nullable(False).primary_key()
        builder.add_column('unixtime_micros_val',
                           type_=kudu.unixtime_micros,
                           nullable=False)
        builder.add_column('string_val',
                           type_=kudu.string,
                           compression=kudu.COMPRESSION_LZ4,
                           encoding='prefix')
        builder.add_column('bool_val', type_=kudu.bool)
        builder.add_column('double_val', type_=kudu.double)
        builder.add_column('int8_val', type_=kudu.int8)
        builder.add_column('binary_val',
                           type_='binary',
                           compression=kudu.COMPRESSION_SNAPPY,
                           encoding='prefix')
        builder.add_column('float_val', type_=kudu.float)
        schema = builder.build()

        self.projected_names_w_o_float = [
            col for col in schema.names if col != 'float_val'
        ]

        partitioning = Partitioning().add_hash_partitions(column_names=['key'],
                                                          num_buckets=3)

        self.client.create_table(table_name, schema, partitioning)
        self.type_table = self.client.table(table_name)

        # Insert new rows
        self.type_test_rows = [
            (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
             "Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
             b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
             3.402823 * (10 ^ 38)),
            (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二",
             False, 200.1, -1,
             b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
             -150.2)
        ]
        session = self.client.new_session()
        for row in self.type_test_rows:
            op = self.type_table.new_insert(row)
            session.apply(op)
        session.flush()

        # Remove the float values from the type_test_rows tuples so we can
        # compare the other vals
        self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]
Пример #39
0
 def builder() -> kudu.schema:
     return kudu.schema_builder()
Пример #40
0
#!/usr/bin/env python
import time
import kudu
from kudu.client import Partitioning
from datetime import datetime

table_name = 'master_foo'
# Mount/connect the Kudu queen 
client = kudu.connect(host='queen', port=7051)


builder = kudu.schema_builder()
builder.add_column('key').type(kudu.int64).nullable(False).primary_key()
builder.add_column('name').type(kudu.string)
schema = builder.build()
partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3)


try: 
  print('...try to open the table')
  table = client.table(table_name)
except Exception as e:
  print('...create table')
  client.create_table(table_name, schema, partitioning)  
  print('...wait 3 sec before access the table')
  time.sleep(3)
  table = client.table(table_name)
  no = 10000
  for i in range(no):
    print('add row {}'.format(no-i))
    op = table.new_insert({'key': i, 'name': 'foo{}'.format(i)})
Пример #41
0
    def setUpClass(self):
        """
        Parent class for both the Scan tests and the
        Scan Token tests
        """
        super(TestScanBase, self).setUpClass()

        self.nrows = 100
        table = self.client.table(self.ex_table)
        session = self.client.new_session()

        tuples = []
        for i in range(self.nrows):
            op = table.new_insert()
            tup = i, \
                  i * 2, \
                  'hello_%d' % i if i % 2 == 0 else None, \
                  datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            op['key'] = tup[0]
            op['int_val'] = tup[1]
            if i % 2 == 0:
                op['string_val'] = tup[2]
            op['unixtime_micros_val'] = tup[3]
            session.apply(op)
            tuples.append(tup)
        session.flush()

        self.table = table
        self.tuples = []

        # Replace missing values w/ defaults to test default values.
        for tuple in tuples:
            if tuple[2] == None:
                tuple = (tuple[0], tuple[1], 'nothing', tuple[3])
            self.tuples.append(tuple)

        # Create table to test all types
        # for various predicate tests
        table_name = 'type-test'
        # Create schema, partitioning and then table
        builder = kudu.schema_builder()
        builder.add_column('key').type(kudu.int64).nullable(False)
        builder.add_column('unixtime_micros_val',
                           type_=kudu.unixtime_micros,
                           nullable=False)
        if kudu.CLIENT_SUPPORTS_DECIMAL:
            builder.add_column('decimal_val',
                               type_=kudu.decimal,
                               precision=5,
                               scale=2)
        builder.add_column('string_val',
                           type_=kudu.string,
                           compression=kudu.COMPRESSION_LZ4,
                           encoding='prefix')
        builder.add_column('bool_val', type_=kudu.bool)
        builder.add_column('double_val', type_=kudu.double)
        builder.add_column('int8_val', type_=kudu.int8)
        builder.add_column('binary_val',
                           type_='binary',
                           compression=kudu.COMPRESSION_SNAPPY,
                           encoding='prefix')
        builder.add_column('float_val', type_=kudu.float)
        builder.set_primary_keys(['key', 'unixtime_micros_val'])
        schema = builder.build()

        self.projected_names_w_o_float = [
            col for col in schema.names if col != 'float_val'
        ]

        partitioning = Partitioning() \
            .add_hash_partitions(column_names=['key'], num_buckets=3)\
            .set_range_partition_columns(['unixtime_micros_val'])\
            .add_range_partition(
                upper_bound={'unixtime_micros_val': ("2016-01-01", "%Y-%m-%d")},
                upper_bound_type=kudu.EXCLUSIVE_BOUND
            )\
            .add_range_partition(
                lower_bound={'unixtime_micros_val': datetime.datetime(2016, 1, 1)},
                lower_bound_type='INCLUSIVE',
                upper_bound={'unixtime_micros_val': datetime.datetime(9999, 12, 31)}
            )

        self.client.create_table(table_name, schema, partitioning)
        self.type_table = self.client.table(table_name)

        # Insert new rows
        if kudu.CLIENT_SUPPORTS_DECIMAL:
            self.type_test_rows = [
                (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
                 Decimal('111.11'), "Test One", True,
                 1.7976931348623157 * (10 ^ 308), 127,
                 b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
                 3.402823 * (10 ^ 38)),
                (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
                 Decimal('0.99'), "测试二", False, 200.1, -1,
                 b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
                 -150.2)
            ]
        else:
            self.type_test_rows = [
                (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
                 "Test One", True, 1.7976931348623157 * (10 ^ 308), 127,
                 b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
                 3.402823 * (10 ^ 38)),
                (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), "测试二",
                 False, 200.1, -1,
                 b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
                 -150.2)
            ]
        session = self.client.new_session()
        for row in self.type_test_rows:
            op = self.type_table.new_insert(row)
            session.apply(op)
        session.flush()

        # Remove the float values from the type_test_rows tuples so we can
        # compare the other vals
        self.type_test_rows = [tuple[:-1] for tuple in self.type_test_rows]
def executeCommand(client, command, tableName):
    print("Executing Command {} on table {}".format(command, tableName))

    if command == "create":
        # Creating a table requires just a few steps
        # - Define your schema
        # - Define your partitioning scheme
        # - Call the create_table API

        # Use the schema_builder to build your table's schema
        builder = kudu.schema_builder()

        # Lastname column
        builder.add_column('lastname').type('string').default(
            'doe').compression('snappy').encoding('plain').nullable(False)

        # State/Province the person lives in
        # Leave all defaults except for the type and nullability
        builder.add_column('state_prov').type('string').nullable(False)

        builder.add_column('key').type(kudu.int64).nullable(False)

        # We prefer using dot notation, so let's add a few more columns
        # using that strategy
        #  - type : We specify the string representation of types
        #  - default: Default value if none specified
        #  - compression: Compression type
        #  - encoding: Encoding strategy
        #  - nullable: Nullability
        #  - block_size: Target block size, overriding server defaults
        builder.add_column('firstname').type('string').default(
            'jane').compression('zlib').encoding('plain').nullable(
                False).block_size(20971520)

        # Use add_column list of parameters to specify properties
        # just as an example instead of dot notation.
        builder.add_column('ts_val',
                           type_=kudu.unixtime_micros,
                           nullable=False,
                           compression='lz4')

        # Set our primary key column(s)
        builder.set_primary_keys(['lastname', 'state_prov', 'key'])

        # Build the schema
        schema = builder.build()

        # Define Hash partitioned column by the state/province
        # Its quite possible the data would then be skewed across partitions
        # so what we'll do here is add a the optional 3rd parameter to
        # help randomize the mapping of rows to hash buckets.
        partitioning = Partitioning().add_hash_partitions(
            column_names=['state_prov'], num_buckets=3, seed=13)

        # We've hash partitioned according to the state, now let's further
        # range partition our content by lastname. If we wanted to find all
        # the "Smith" families in the state of Oregon, we would very quickly
        # be able to isolate those rows with this type of schema.
        # Set the range partition columns - these columns MUST be part of
        # the primary key columns.
        partitioning.set_range_partition_columns('lastname')
        # Add range partitions
        partitioning.add_range_partition(['A'], ['E'])
        # By default, lower bound is inclusive while upper is exclusive
        partitioning.add_range_partition(['E'], ['Z'],
                                         upper_bound_type='inclusive')

        # Create new table passing in the table name, schema, partitioning
        # object and the optional parameter of number of replicas for this
        # table. If none specified, then it'll go by the Kudu server default
        # value for number of replicas.
        client.create_table(tableName, schema, partitioning, 1)
    elif command == "insert":
        # Open a table
        table = client.table(tableName)

        # Create a new session so that we can apply write operations
        session = client.new_session()

        # We have a few flush modes at our disposal, namely:
        # FLUSH_MANUAL, FLUSH_AUTO_SYNC and FLUSH_AUTO_BACKGROUND
        # The default is FLUSH_MANUAL, and we want to flush manually for
        # our examples below. Just providing example on how to change it
        # needed.
        session.set_flush_mode(kudu.FLUSH_MANUAL)

        # We can set a timeout value as well in milliseconds. Set ours to
        # 3 seconds.
        session.set_timeout_ms(3000)

        # Insert a row
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        op = table.new_insert({
            'lastname': 'Smith',
            'state_prov': 'ON',
            'firstname': 'Mike',
            'key': 1,
            'ts_val': datetime.utcnow()
        })
        session.apply(op)
        try:
            session.flush()
        except kudu.KuduBadStatus as e:
            (errorResult, overflowed) = session.get_pending_errors()
            print("Insert row failed: {} (more pending errors? {})".format(
                errorResult, overflowed))
Пример #43
0

# Parse arguments
parser = argparse.ArgumentParser(description='Basic Example for Kudu Python.')
parser.add_argument('--masters', '-m', nargs='+', default='localhost',
                    help='The master address(es) to connect to Kudu.')
parser.add_argument('--ports', '-p', nargs='+', default='7051',
                    help='The master server port(s) to connect to Kudu.')
args = parser.parse_args()


# Connect to Kudu master server(s).
client = kudu.connect(host=args.masters, port=args.ports)

# Define a schema for a new table.
builder = kudu.schema_builder()
builder.add_column('key').type(kudu.int64).nullable(False).primary_key()
builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4')
schema = builder.build()

# Define the partitioning schema.
partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3)

# Delete table if it already exists.
if client.table_exists('python-example'):
  client.delete_table('python-example')

# Create a new table.
client.create_table('python-example', schema, partitioning)

# Open a table.