示例#1
0
def test_numpy_tablet_serialization():

    measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
    data_types_ = [
        TSDataType.BOOLEAN,
        TSDataType.INT32,
        TSDataType.INT64,
        TSDataType.FLOAT,
        TSDataType.DOUBLE,
        TSDataType.TEXT,
    ]
    values_ = [
        [False, 10, 11, 1.1, 10011.1, "test01"],
        [True, 100, 11111, 1.25, 101.0, "test02"],
        [False, 100, 1, 188.1, 688.25, "test03"],
        [True, 0, 0, 0, 6.25, "test04"],
    ]
    timestamps_ = [16, 17, 18, 19]
    tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                     values_, timestamps_)
    np_values_ = [
        np.array([False, True, False, True], np.dtype(">?")),
        np.array([10, 100, 100, 0], np.dtype(">i4")),
        np.array([11, 11111, 1, 0], np.dtype(">i8")),
        np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")),
        np.array([10011.1, 101.0, 688.25, 6.25], np.dtype(">f8")),
        np.array(["test01", "test02", "test03", "test04"]),
    ]
    np_timestamps_ = np.array([16, 17, 18, 19], np.dtype(">i8"))
    np_tablet_ = NumpyTablet("root.sg_test_01.d_01", measurements_,
                             data_types_, np_values_, np_timestamps_)
    assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps(
    )
    assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
示例#2
0
def test_simple_query():
    with IoTDBContainer() as db:
        db: IoTDBContainer
        session = Session(db.get_container_host_ip(), db.get_exposed_port(6667))
        session.open(False)

        create_ts(session)

        # insert data
        data_nums = 100
        data = {}
        timestamps = np.arange(data_nums)
        data[ts_path_lst[0]] = np.float32(np.random.rand(data_nums))
        data[ts_path_lst[1]] = np.random.rand(data_nums)
        data[ts_path_lst[2]] = np.random.randint(10, 100, data_nums, dtype="int32")
        data[ts_path_lst[3]] = np.random.randint(10, 100, data_nums, dtype="int64")
        data[ts_path_lst[4]] = np.random.choice([True, False], size=data_nums)
        data[ts_path_lst[5]] = np.random.choice(["text1", "text2"], size=data_nums)

        df_input = pd.DataFrame(data)

        tablet = Tablet(
            device_id, measurements, data_type_lst, df_input.values, timestamps
        )
        session.insert_tablet(tablet)

        df_input.insert(0, "Time", timestamps)

        session_data_set = session.execute_query_statement("SELECT * FROM root.*")
        df_output = session_data_set.todf()
        df_output = df_output[df_input.columns.tolist()]

        session.close()
    assert_frame_equal(df_input, df_output)
示例#3
0
def test_numpy_tablet_auto_correct_datatype():

    measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
    data_types_ = [
        TSDataType.BOOLEAN,
        TSDataType.INT32,
        TSDataType.INT64,
        TSDataType.FLOAT,
        TSDataType.DOUBLE,
        TSDataType.TEXT,
    ]
    values_ = [
        [True, 10000, 11111, 8.999, 776, "test05"],
        [True, 1000, 1111, 0, 6.25, "test06"],
        [False, 100, 111, 188.1, 688.25, "test07"],
        [False, 10, 11, 1.25, 101.0, "test08"],
        [False, 0, 1, 1.1, 10011.1, "test09"],
    ]
    timestamps_ = [5, 6, 7, 8, 9]
    tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                     values_, timestamps_)
    np_values_unsorted = [
        np.array([False, False, False, True, True]),
        np.array([0, 10, 100, 1000, 10000]),
        np.array([1, 11, 111, 1111, 11111]),
        np.array([1.1, 1.25, 188.1, 0, 8.999]),
        np.array([10011.1, 101.0, 688.25, 6.25, 776]),
        np.array(["test09", "test08", "test07", "test06", "test05"]),
    ]
    np_timestamps_unsorted = np.array([9, 8, 7, 6, 5])
    # numpy.dtype of int and float should be little endian by default
    assert np_timestamps_unsorted.dtype != np.dtype(">i8")
    for i in range(1, 4):
        assert np_values_unsorted[i].dtype != data_types_[i].np_dtype()
    np_tablet_ = NumpyTablet(
        "root.sg_test_01.d_01",
        measurements_,
        data_types_,
        np_values_unsorted,
        np_timestamps_unsorted,
    )
    assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps(
    )
    assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
示例#4
0
def test_sort_numpy_tablet():

    measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
    data_types_ = [
        TSDataType.BOOLEAN,
        TSDataType.INT32,
        TSDataType.INT64,
        TSDataType.FLOAT,
        TSDataType.DOUBLE,
        TSDataType.TEXT,
    ]
    values_ = [
        [True, 10000, 11111, 8.999, 776, "test05"],
        [True, 1000, 1111, 0, 6.25, "test06"],
        [False, 100, 111, 188.1, 688.25, "test07"],
        [False, 10, 11, 1.25, 101.0, "test08"],
        [False, 0, 1, 1.1, 10011.1, "test09"],
    ]
    timestamps_ = [5, 6, 7, 8, 9]
    tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                     values_, timestamps_)
    np_values_unsorted = [
        np.array([False, False, False, True, True], np.dtype(">?")),
        np.array([0, 10, 100, 1000, 10000], np.dtype(">i4")),
        np.array([1, 11, 111, 1111, 11111], np.dtype(">i8")),
        np.array([1.1, 1.25, 188.1, 0, 8.999], np.dtype(">f4")),
        np.array([10011.1, 101.0, 688.25, 6.25, 776], np.dtype(">f8")),
        np.array(["test09", "test08", "test07", "test06", "test05"]),
    ]
    np_timestamps_unsorted = np.array([9, 8, 7, 6, 5], np.dtype(">i8"))
    np_tablet_ = NumpyTablet(
        "root.sg_test_01.d_01",
        measurements_,
        data_types_,
        np_values_unsorted,
        np_timestamps_unsorted,
    )
    assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps(
    )
    assert tablet_.get_binary_values() == np_tablet_.get_binary_values()
示例#5
0
def test_nullable_tablet_insertion():
    with IoTDBContainer("iotdb:dev") as db:
        db: IoTDBContainer
        session = Session(db.get_container_host_ip(),
                          db.get_exposed_port(6667))
        session.open(False)

        measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
        data_types_ = [
            TSDataType.BOOLEAN,
            TSDataType.INT32,
            TSDataType.INT64,
            TSDataType.FLOAT,
            TSDataType.DOUBLE,
            TSDataType.TEXT,
        ]
        values_ = [
            [None, None, 11, 1.1, 10011.1, "test01"],
            [True, None, 11111, 1.25, 101.0, "test02"],
            [False, 100, 1, None, 688.25, "test03"],
            [True, None, 0, 0, 6.25, None],
        ]
        timestamps_ = [16, 17, 18, 19]
        tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                         values_, timestamps_)
        session.insert_tablet(tablet_)
        columns = []
        for measurement in measurements_:
            columns.append("root.sg_test_01.d_01." + measurement)
        df_input = pd.DataFrame(values_, None, columns)
        df_input.insert(0, "Time", timestamps_)

        session_data_set = session.execute_query_statement(
            "select s_01, s_02, s_03, s_04, s_05, s_06 from root.sg_test_01.d_01"
        )
        df_output = session_data_set.todf()
        df_output = df_output[df_input.columns.tolist()]

        session.close()
    assert_frame_equal(df_input, df_output, False)
示例#6
0
# insert multiple records into database
measurements_list_ = [["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"],
                      ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]]
values_list_ = [[False, 22, 33, 4.4, 55.1, "test_records01"],
                [True, 77, 88, 1.25, 8.125, "test_records02"]]
data_type_list_ = [data_types_, data_types_]
device_ids_ = ["root.sg_test_01.d_01", "root.sg_test_01.d_01"]
session.insert_records(device_ids_, [2, 3], measurements_list_, data_type_list_, values_list_)

# insert one tablet into the database.
values_ = [[False, 10, 11, 1.1, 10011.1, "test01"],
           [True, 100, 11111, 1.25, 101.0, "test02"],
           [False, 100, 1, 188.1, 688.25, "test03"],
           [True, 0, 0, 0, 6.25, "test04"]]  # Non-ASCII text will cause error since bytes can only hold 0-128 nums.
timestamps_ = [4, 5, 6, 7]
tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_)
session.insert_tablet(tablet_)

# insert multiple tablets into database
tablet_01 = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, [8, 9, 10, 11])
tablet_02 = Tablet("root.sg_test_01.d_01", measurements_, data_types_, values_, [12, 13, 14, 15])
session.insert_tablets([tablet_01, tablet_02])

# execute non-query sql statement
session.execute_non_query_statement("insert into root.sg_test_01.d_01(timestamp, s_02) values(16, 188);")

# execute sql query statement
session_data_set = session.execute_query_statement("select * from root.sg_test_01.d_01")
session_data_set.set_fetch_size(1024)
while session_data_set.has_next():
    print(session_data_set.next())
示例#7
0
def test_session():
    with IoTDBContainer("iotdb:dev") as db:
        db: IoTDBContainer
        session = Session(db.get_container_host_ip(),
                          db.get_exposed_port(6667))
        session.open(False)

        if not session.is_open():
            print("can't open session")
            exit(1)

        # set and delete storage groups
        session.set_storage_group("root.sg_test_01")
        session.set_storage_group("root.sg_test_02")
        session.set_storage_group("root.sg_test_03")
        session.set_storage_group("root.sg_test_04")

        if session.delete_storage_group("root.sg_test_02") < 0:
            test_fail()
            print_message("delete storage group failed")

        if session.delete_storage_groups(
            ["root.sg_test_03", "root.sg_test_04"]) < 0:
            test_fail()
            print_message("delete storage groups failed")

        # setting time series.
        session.create_time_series(
            "root.sg_test_01.d_01.s_01",
            TSDataType.BOOLEAN,
            TSEncoding.PLAIN,
            Compressor.SNAPPY,
        )
        session.create_time_series(
            "root.sg_test_01.d_01.s_02",
            TSDataType.INT32,
            TSEncoding.PLAIN,
            Compressor.SNAPPY,
        )
        session.create_time_series(
            "root.sg_test_01.d_01.s_03",
            TSDataType.INT64,
            TSEncoding.PLAIN,
            Compressor.SNAPPY,
        )
        session.create_time_series(
            "root.sg_test_01.d_02.s_01",
            TSDataType.BOOLEAN,
            TSEncoding.PLAIN,
            Compressor.SNAPPY,
            None,
            {"tag1": "v1"},
            {"description": "v1"},
            "temperature",
        )

        # setting multiple time series once.
        ts_path_lst_ = [
            "root.sg_test_01.d_01.s_04",
            "root.sg_test_01.d_01.s_05",
            "root.sg_test_01.d_01.s_06",
            "root.sg_test_01.d_01.s_07",
            "root.sg_test_01.d_01.s_08",
            "root.sg_test_01.d_01.s_09",
        ]
        data_type_lst_ = [
            TSDataType.FLOAT,
            TSDataType.DOUBLE,
            TSDataType.TEXT,
            TSDataType.FLOAT,
            TSDataType.DOUBLE,
            TSDataType.TEXT,
        ]
        encoding_lst_ = [TSEncoding.PLAIN for _ in range(len(data_type_lst_))]
        compressor_lst_ = [
            Compressor.SNAPPY for _ in range(len(data_type_lst_))
        ]
        session.create_multi_time_series(ts_path_lst_, data_type_lst_,
                                         encoding_lst_, compressor_lst_)
        ts_path_lst_ = [
            "root.sg_test_01.d_02.s_04",
            "root.sg_test_01.d_02.s_05",
            "root.sg_test_01.d_02.s_06",
            "root.sg_test_01.d_02.s_07",
            "root.sg_test_01.d_02.s_08",
            "root.sg_test_01.d_02.s_09",
        ]
        data_type_lst_ = [
            TSDataType.FLOAT,
            TSDataType.DOUBLE,
            TSDataType.TEXT,
            TSDataType.FLOAT,
            TSDataType.DOUBLE,
            TSDataType.TEXT,
        ]
        encoding_lst_ = [TSEncoding.PLAIN for _ in range(len(data_type_lst_))]
        compressor_lst_ = [
            Compressor.SNAPPY for _ in range(len(data_type_lst_))
        ]
        tags_lst_ = [{"tag2": "v2"} for _ in range(len(data_type_lst_))]
        attributes_lst_ = [{
            "description": "v2"
        } for _ in range(len(data_type_lst_))]
        session.create_multi_time_series(
            ts_path_lst_,
            data_type_lst_,
            encoding_lst_,
            compressor_lst_,
            None,
            tags_lst_,
            attributes_lst_,
            None,
        )

        # delete time series
        if (session.delete_time_series([
                "root.sg_test_01.d_01.s_07",
                "root.sg_test_01.d_01.s_08",
                "root.sg_test_01.d_01.s_09",
        ]) < 0):
            test_fail()
            print_message("delete time series failed")

        # checking time series
        # s_07 expecting False
        if session.check_time_series_exists("root.sg_test_01.d_01.s_07"):
            test_fail()
            print_message("root.sg_test_01.d_01.s_07 shouldn't exist")

        # s_03 expecting True
        if not session.check_time_series_exists("root.sg_test_01.d_01.s_03"):
            test_fail()
            print_message("root.sg_test_01.d_01.s_03 should exist")
        # d_02.s_01 expecting True
        if not session.check_time_series_exists("root.sg_test_01.d_02.s_01"):
            test_fail()
            print_message("root.sg_test_01.d_02.s_01 should exist")
        # d_02.s_06 expecting True
        if not session.check_time_series_exists("root.sg_test_01.d_02.s_06"):
            test_fail()
            print_message("root.sg_test_01.d_02.s_06 should exist")

        # insert one record into the database.
        measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
        values_ = [False, 10, 11, 1.1, 10011.1, "test_record"]
        data_types_ = [
            TSDataType.BOOLEAN,
            TSDataType.INT32,
            TSDataType.INT64,
            TSDataType.FLOAT,
            TSDataType.DOUBLE,
            TSDataType.TEXT,
        ]
        if (session.insert_record("root.sg_test_01.d_01", 1, measurements_,
                                  data_types_, values_) < 0):
            test_fail()
            print_message("insert record failed")

        # insert multiple records into database
        measurements_list_ = [
            ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"],
            ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"],
        ]
        values_list_ = [
            [False, 22, 33, 4.4, 55.1, "test_records01"],
            [True, 77, 88, 1.25, 8.125, "test_records02"],
        ]
        data_type_list_ = [data_types_, data_types_]
        device_ids_ = ["root.sg_test_01.d_01", "root.sg_test_01.d_01"]
        if (session.insert_records(device_ids_, [2, 3], measurements_list_,
                                   data_type_list_, values_list_) < 0):
            test_fail()
            print_message("insert records failed")

        # insert one tablet into the database.
        values_ = [
            [False, 10, 11, 1.1, 10011.1, "test01"],
            [True, 100, 11111, 1.25, 101.0, "test02"],
            [False, 100, 1, 188.1, 688.25, "test03"],
            [True, 0, 0, 0, 6.25, "test04"],
        ]  # Non-ASCII text will cause error since bytes can only hold 0-128 nums.
        timestamps_ = [4, 5, 6, 7]
        tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                         values_, timestamps_)

        if session.insert_tablet(tablet_) < 0:
            test_fail()
            print_message("insert tablet failed")

        # insert one numpy tablet into the database.
        np_values_ = [
            np.array([False, True, False, True], np.dtype(">?")),
            np.array([10, 100, 100, 0], np.dtype(">i4")),
            np.array([11, 11111, 1, 0], np.dtype(">i8")),
            np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")),
            np.array([10011.1, 101.0, 688.25, 6.25], np.dtype(">f8")),
            np.array(["test01", "test02", "test03", "test04"]),
        ]
        np_timestamps_ = np.array([1, 2, 3, 4], np.dtype(">i8"))
        np_tablet_ = NumpyTablet(
            "root.sg_test_01.d_02",
            measurements_,
            data_types_,
            np_values_,
            np_timestamps_,
        )
        if session.insert_tablet(np_tablet_) < 0:
            test_fail()
            print_message("insert numpy tablet failed")

        # insert multiple tablets into database
        tablet_01 = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                           values_, [8, 9, 10, 11])
        tablet_02 = Tablet(
            "root.sg_test_01.d_01",
            measurements_,
            data_types_,
            values_,
            [12, 13, 14, 15],
        )
        if session.insert_tablets([tablet_01, tablet_02]) < 0:
            test_fail()
            print_message("insert tablets failed")

        # insert one tablet with empty cells into the database.
        values_ = [
            [None, 10, 11, 1.1, 10011.1, "test01"],
            [True, None, 11111, 1.25, 101.0, "test02"],
            [False, 100, 1, None, 688.25, "test03"],
            [True, 0, 0, 0, None, None],
        ]  # Non-ASCII text will cause error since bytes can only hold 0-128 nums.
        timestamps_ = [20, 21, 22, 23]
        tablet_ = Tablet("root.sg_test_01.d_01", measurements_, data_types_,
                         values_, timestamps_)
        if session.insert_tablet(tablet_) < 0:
            test_fail()
            print_message("insert tablet with empty cells failed")

        # insert records of one device
        time_list = [1, 2, 3]
        measurements_list = [
            ["s_01", "s_02", "s_03"],
            ["s_01", "s_02", "s_03"],
            ["s_01", "s_02", "s_03"],
        ]
        data_types_list = [
            [TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64],
            [TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64],
            [TSDataType.BOOLEAN, TSDataType.INT32, TSDataType.INT64],
        ]
        values_list = [[False, 22, 33], [True, 1, 23], [False, 15, 26]]

        if (session.insert_records_of_one_device(
                "root.sg_test_01.d_01",
                time_list,
                measurements_list,
                data_types_list,
                values_list,
        ) < 0):
            test_fail()
            print_message("insert records of one device failed")

        # execute non-query sql statement
        if (session.execute_non_query_statement(
                "insert into root.sg_test_01.d_01(timestamp, s_02) values(16, 188)"
        ) < 0):
            test_fail()
            print_message(
                "execute 'insert into root.sg_test_01.d_01(timestamp, s_02) values(16, 188)' failed"
            )

        # execute sql query statement
        session_data_set = session.execute_query_statement(
            "select * from root.sg_test_01.d_01")
        session_data_set.set_fetch_size(1024)
        expect_count = 20
        actual_count = 0
        while session_data_set.has_next():
            print(session_data_set.next())
            actual_count += 1
        session_data_set.close_operation_handle()

        if actual_count != expect_count:
            test_fail()
            print_message("query count mismatch: expect count: " +
                          str(expect_count) + " actual count: " +
                          str(actual_count))

        # close session connection.
        session.close()
def performance_test(
    measure_tstype_infos,
    data_file_name,
    use_new=True,
    check_result=False,
    row=10000,
    col=5000,
):
    """
    execute tablet insert using original or new methods.
    :param measure_tstype_infos: key(str): measurement name, value(TSDataType): measurement data type
    :param use_new: True if check out the result
    :param data_file_name: the csv file name to insert
    :param row: tablet row number
    :param col: tablet column number
    """
    print(
        f"Test python: use new: {use_new}, row: {row}, col: {col}. measurements: {measure_tstype_infos}"
    )
    print(f"Total points: {len(measure_tstype_infos) * row * col}")

    # open the session and clean data
    session = create_open_session()
    session.execute_non_query_statement("delete timeseries root.*")

    # test start
    st = time.perf_counter()
    csv_data = load_csv_data(measure_tstype_infos, data_file_name)
    load_cost = time.perf_counter() - st
    insert_cost = 0
    measurements = list(measure_tstype_infos.keys())
    data_types = list(measure_tstype_infos.values())
    for i in range(0, col):
        # if i % 500 == 0:
        #     print(f"insert {i} cols")
        device_id = "root.sg%d.%d" % (i % 8, i)
        if not use_new:
            # Use the ORIGINAL method to construct tablet
            timestamps_ = []
            values = []
            for t in range(0, row):
                timestamps_.append(csv_data.at[t, TIME_STR])
                value_array = []
                for m in measurements:
                    value_array.append(csv_data.at[t, m])
                values.append(value_array)
        else:
            # Use the NEW method to construct tablet
            timestamps_ = csv_data[TIME_STR].values
            if timestamps_.dtype != FORMAT_CHAR_OF_TYPES[TSDataType.INT64]:
                timestamps_ = timestamps_.astype(
                    FORMAT_CHAR_OF_TYPES[TSDataType.INT64])
            values = []
            for measure, tstype in measure_tstype_infos.items():
                type_char = FORMAT_CHAR_OF_TYPES[tstype]
                value_array = csv_data[measure].values
                if value_array.dtype != type_char:
                    if not (tstype == TSDataType.TEXT
                            and value_array.dtype == object):
                        value_array = value_array.astype(type_char)
                values.append(value_array)

        tablet = Tablet(device_id,
                        measurements,
                        data_types,
                        values,
                        timestamps_,
                        use_new=use_new)
        cost_st = time.perf_counter()
        session.insert_tablet(tablet)
        insert_cost += time.perf_counter() - cost_st

        if check_result:
            check_count(row, session, "select count(*) from %s" % device_id)
            expect = []
            for t in range(row):
                line = [str(csv_data.at[t, TIME_STR])]
                for m in measurements:
                    line.append(str(csv_data.at[t, m]))
                expect.append("\t\t".join([v for v in line]))
            check_query_result(
                expect, session,
                f"select {','.join(measurements)} from {device_id}")
            print("query validation have passed")
    end = time.perf_counter()

    # clean data and close the session
    session.execute_non_query_statement("delete timeseries root.*")
    session.close()

    print("load cost: %.3f s" % load_cost)
    print("construct tablet cost: %.3f s" %
          (end - st - insert_cost - load_cost))
    print("insert tablet cost: %.3f s" % insert_cost)
    print("total cost: %.3f s" % (end - st))