Пример #1
0
def test_iteration_adhoc_out() -> None:
    gen_out = iteration.empty_out()
    next(gen_out)
    gen_out.send(b'')
    assert not list(gen_out)

    gen_out = iteration.empty_out()
    next(gen_out)
    catched_error = False
    try:
        gen_out.send(b'1')
    except RuntimeError:
        catched_error = True
    assert catched_error

    gen_out = iteration.ignore_out()
    next(gen_out)
    gen_out.send(b'1')
    gen_out.send(b'2')
    gen_out.send(b'3')
    gen_out.send(b'')
    assert not list(gen_out)

    stdout_list: typing.List[bytes] = []
    gen_out = iteration.collect_out(stdout_list)
    next(gen_out)
    gen_out.send(b'1')
    gen_out.send(b'2')
    gen_out.send(b'3')
    gen_out.send(b'')
    assert stdout_list == [b'1', b'2', b'3']
    assert not list(gen_out)
Пример #2
0
    def start(self,
              ping_interval: float = 0.1,
              ping_retry: int = 50) -> typing.Optional[int]:
        pid = self.get_pid()

        if pid is not None:
            return None

        config_path = self._path.joinpath('config.xml')
        pid_path = self._path.joinpath('pid')

        # create dir

        self._path.mkdir(parents=True, exist_ok=True)

        # setup

        clickhouse.create_config(self._tcp_port,
                                 self._http_port, self._user, self._password,
                                 str(self._path), self._memory_limit,
                                 self._config)

        # run

        if connection.run_process([
                clickhouse.binary_file(),
                'server',
                '--daemon',
                f'--config-file={config_path}',
                f'--pid-file={pid_path}',
        ], iteration.empty_in(), iteration.empty_out(),
                                  iteration.empty_out())():
            raise exception.ServiceError(self._host, 'daemon')

        # wait for server initialization

        for _ in range(ping_retry):
            pid = self.get_pid()

            if pid is not None:
                break

            time.sleep(ping_interval)
        else:
            raise exception.ServiceError(self._host, 'pid')

        while not self.ping():
            time.sleep(ping_interval)

            if self.get_pid() is None:
                raise exception.ServiceError(self._host, f'pid_{pid}')

        return pid
Пример #3
0
    def get_pid(self) -> typing.Optional[int]:
        pid_path = self._path.joinpath('pid')

        # get pid

        stdout_list: typing.List[bytes] = []

        if connection.run_ssh(self._ssh_client, [
                'cat',
                str(pid_path),
        ], iteration.empty_in(), iteration.collect_out(stdout_list),
                              iteration.ignore_out())():
            return None

        pid = int(b''.join(stdout_list).decode().strip())

        # find process

        if connection.run_ssh(self._ssh_client, [
                'kill',
                '-0',
                str(pid),
        ], iteration.empty_in(), iteration.empty_out(),
                              iteration.ignore_out())():
            return None

        return pid
Пример #4
0
 def run() -> None:
     connection.run_process(
         [clickhouse.binary_file(), 'client'],
         iteration.given_in([b'select number from numbers(1000000)']),
         iteration.ignore_out(),
         iteration.empty_out()
     )()
Пример #5
0
def test_iteration_empty_out() -> None:
    gen_out = iteration.empty_out()
    next(gen_out)
    gen_out.send(b'')

    assert not list(gen_out)

    gen_out = iteration.empty_out()
    next(gen_out)
    catched_error = False

    try:
        gen_out.send(b'1')
    except RuntimeError:
        catched_error = True

    assert catched_error
Пример #6
0
 def run() -> None:
     connection.run_http(
         'localhost',
         8123,
         '/',
         iteration.given_in([b'select number from numbers(1000000)']),
         iteration.ignore_out(),
         iteration.empty_out()
     )()
Пример #7
0
    def stop(self,
             ping_interval: float = 0.1,
             ping_retry: int = 50) -> typing.Optional[int]:
        pid = self.get_pid()

        if pid is None:
            return None

        # kill process

        stderr_list: typing.List[bytes] = []

        assert self._ssh_client is not None

        if connection.run_ssh(self._ssh_client, [
                'kill',
                '-15',
                str(pid),
        ], iteration.empty_in(), iteration.empty_out(),
                              iteration.collect_out(stderr_list))():
            raise exception.ShellError(self._host, b''.join(stderr_list))

        for _ in range(ping_retry):
            if self.get_pid() is None:
                break

            time.sleep(ping_interval)
        else:
            stderr_list = []

            if connection.run_ssh(self._ssh_client, [
                    'kill',
                    '-9',
                    str(pid),
            ], iteration.empty_in(), iteration.empty_out(),
                                  iteration.collect_out(stderr_list))():
                raise exception.ShellError(self._host, b''.join(stderr_list))

            while self.get_pid() is not None:
                time.sleep(ping_interval)

        return pid
Пример #8
0
def test_connection_http() -> None:
    ck.LocalSession(stop=True, start=True)

    stdout_list: typing.List[bytes] = []
    status = connection.run_http('localhost', 8123, '/', {},
                                 iteration.given_in([b'select 1']),
                                 iteration.collect_out(stdout_list),
                                 iteration.empty_out())()

    assert stdout_list == [b'1\n']
    assert status == 200
Пример #9
0
def test_connection_process() -> None:
    ck.LocalSession(stop=True, start=True)

    stdout_list: typing.List[bytes] = []
    status = connection.run_process([clickhouse.binary_file(), 'client'],
                                    iteration.given_in([b'select 1']),
                                    iteration.collect_out(stdout_list),
                                    iteration.empty_out())()

    assert stdout_list == [b'1\n']
    assert status == 0
Пример #10
0
def test_connection_ssh() -> None:
    ck.LocalSession()

    ssh_client = connection.connect_ssh('localhost')

    stdout_list: typing.List[bytes] = []
    status = connection.run_ssh(
        ssh_client,
        [clickhouse.binary_file(), 'client'],
        iteration.given_in([b'select 1']),
        iteration.collect_out(stdout_list),
        iteration.empty_out()
    )()

    assert stdout_list == [b'1\n']
    assert status == 0
Пример #11
0
    def query_file_async(
        self,
        query: str,
        path_in: typing.Optional[str] = None,
        path_out: typing.Optional[str] = None,
        method: typing.Optional[typing_extensions.Literal['tcp', 'http',
                                                          'ssh']] = None,
        settings: typing.Optional[typing.Dict[str, str]] = None
    ) -> typing.Callable[[], None]:
        if path_in is None:
            gen_in = iteration.empty_in()
        else:
            gen_in = iteration.file_in(path_in)

        if path_out is None:
            gen_out = iteration.empty_out()
        else:
            gen_out = iteration.file_out(path_out)

        return self._run(query, gen_in, gen_out, method, settings)
Пример #12
0
    def start(self,
              ping_interval: float = 0.1,
              ping_retry: int = 50) -> typing.Optional[int]:
        pid = self.get_pid()

        if pid is not None:
            return None

        config_path = self._path.joinpath('config.xml')
        pid_path = self._path.joinpath('pid')

        # create dir

        stderr_list: typing.List[bytes] = []

        if connection.run_ssh(self._ssh_client, [
                'mkdir',
                '--parents',
                str(self._path),
        ], iteration.empty_in(), iteration.empty_out(),
                              iteration.collect_out(stderr_list))():
            raise exception.ShellError(self._host, b''.join(stderr_list))

        # setup

        stderr_list = []

        if connection.run_ssh(
                self._ssh_client, [
                    *self._ssh_command_prefix,
                    'python3',
                    '-m',
                    'ck.clickhouse.setup',
                ],
                iteration.given_in([
                    repr({
                        'tcp_port': self._tcp_port,
                        'http_port': self._http_port,
                        'user': self._user,
                        'password': self._password,
                        'data_dir': str(self._path),
                        'config': self._config,
                    }).encode()
                ]), iteration.empty_out(),
                iteration.collect_out(stderr_list))():
            raise exception.ShellError(self._host, b''.join(stderr_list))

        # run

        assert self._ssh_binary_file is not None

        if connection.run_ssh(self._ssh_client, [
                *self._ssh_command_prefix,
                self._ssh_binary_file,
                'server',
                '--daemon',
                f'--config-file={config_path}',
                f'--pid-file={pid_path}',
        ], iteration.empty_in(), iteration.empty_out(),
                              iteration.empty_out())():
            raise exception.ServiceError(self._host, 'daemon')

        # wait for server initialization

        for _ in range(ping_retry):
            pid = self.get_pid()

            if pid is not None:
                break

            time.sleep(ping_interval)
        else:
            raise exception.ServiceError(self._host, 'pid')

        while not self.ping():
            time.sleep(ping_interval)

            if self.get_pid() is None:
                raise exception.ServiceError(self._host, f'pid_{pid}')

        return pid
Пример #13
0
    def query_pandas_async(
        self,
        query: str,
        dataframe: typing.Optional[pandas.DataFrame] = None,
        method: typing.Optional[typing_extensions.Literal['tcp', 'http',
                                                          'ssh']] = None,
        settings: typing.Optional[typing.Dict[str, str]] = None,
        join_interval: float = 0.1
    ) -> typing.Callable[[], typing.Optional[pandas.DataFrame]]:
        batch = None
        error = None

        # prepare

        read_stream, write_stream = iteration.echo_io()

        if dataframe is None:
            gen_in = iteration.empty_in()
            gen_out = iteration.stream_out(write_stream)
        else:
            gen_in = iteration.stream_in(read_stream)
            gen_out = iteration.empty_out()

        raw_join = self._run(f'{query} format ArrowStream', gen_in, gen_out,
                             method, settings)

        # create thread

        def handle_batch() -> None:
            nonlocal dataframe
            nonlocal batch
            nonlocal error

            try:
                if dataframe is None:
                    batch = pyarrow.RecordBatchStreamReader(read_stream)
                    dataframe = batch.read_pandas()
                else:
                    table = pyarrow.Table.from_pandas(dataframe)
                    batch = pyarrow.RecordBatchStreamWriter(
                        write_stream, table.schema)
                    batch.write_table(table)
                    dataframe = None
                    batch.close()
                    write_stream.close()

            except BaseException as raw_error:  # pylint: disable=broad-except
                error = raw_error

        thread = threading.Thread(target=handle_batch)

        thread.start()

        # join thread

        def join() -> typing.Optional[pandas.DataFrame]:
            while error is None and thread.is_alive():
                thread.join(join_interval)

            if error is not None:
                raise error  # pylint: disable=raising-bad-type

            raw_join()

            return dataframe

        return join
Пример #14
0
    def query_pandas_async(
        self,
        query: str,
        dataframe: typing.Optional[pandas.DataFrame] = None,
        encoding: typing.Optional[str] = 'utf-8',
        method: typing.Optional[typing_extensions.Literal['tcp', 'http',
                                                          'ssh']] = None,
        settings: typing.Optional[typing.Dict[str, str]] = None,
        join_interval: float = 0.1
    ) -> typing.Callable[[], typing.Optional[pandas.DataFrame]]:
        batch = None
        error = None

        # prepare

        read_stream, write_stream = iteration.echo_io()

        if dataframe is None:
            gen_in = iteration.empty_in()
            gen_out = iteration.stream_out(write_stream)
        else:
            gen_in = iteration.stream_in(read_stream)
            gen_out = iteration.empty_out()

        raw_join = self._run(f'{query} format ArrowStream', gen_in, gen_out,
                             method, settings)

        # create thread

        def handle_batch() -> None:
            nonlocal dataframe
            nonlocal batch
            nonlocal error

            try:
                if dataframe is None:
                    batch = pyarrow.RecordBatchStreamReader(read_stream)
                    dataframe = batch.read_pandas()

                    if encoding is not None:

                        def decode(value: typing.Any) -> typing.Any:
                            if type(value) is bytes:
                                assert encoding is not None

                                return value.decode(encoding)

                            if type(value) is bytearray:
                                assert encoding is not None

                                return value.decode(encoding)

                            if type(value) is tuple:
                                return tuple(decode(child) for child in value)

                            if type(value) is list:
                                return [decode(child) for child in value]

                            if type(value) is numpy.ndarray:
                                return numpy.array(
                                    [decode(child) for child in value])

                            if type(value) is set:
                                return {decode(child) for child in value}

                            if type(value) is frozenset:
                                return frozenset(
                                    decode(child) for child in value)

                            if type(value) is dict:
                                return {
                                    key: decode(child)
                                    for key, child in value.items()
                                }

                            return value

                        dataframe = pandas.DataFrame({
                            column: (dataframe[column].apply(decode)
                                     if dataframe[column].dtype == 'O' else
                                     dataframe[column])
                            for column in dataframe
                        })
                else:
                    if encoding is not None:

                        def encode(value: typing.Any) -> typing.Any:
                            if type(value) is str:
                                assert encoding is not None

                                return value.encode(encoding)

                            if type(value) is tuple:
                                return tuple(encode(child) for child in value)

                            if type(value) is list:
                                return [encode(child) for child in value]

                            if type(value) is numpy.ndarray:
                                return numpy.array(
                                    [encode(child) for child in value])

                            if type(value) is set:
                                return {encode(child) for child in value}

                            if type(value) is frozenset:
                                return frozenset(
                                    encode(child) for child in value)

                            if type(value) is dict:
                                return {
                                    key: encode(child)
                                    for key, child in value.items()
                                }

                            return value

                        dataframe = pandas.DataFrame({
                            column: (dataframe[column].apply(encode)
                                     if dataframe[column].dtype == 'O' else
                                     dataframe[column])
                            for column in dataframe
                        })

                    table = pyarrow.Table.from_arrays([
                        pyarrow.array(dataframe[column].values)
                        for column in dataframe
                    ], dataframe.columns)
                    batch = pyarrow.RecordBatchStreamWriter(
                        write_stream, table.schema)
                    batch.write_table(table)
                    dataframe = None
                    batch.close()
                    write_stream.close()
            except pyarrow.ArrowInvalid:
                pass
            except BaseException as raw_error:  # pylint: disable=broad-except
                error = raw_error

        thread = threading.Thread(target=handle_batch)

        thread.start()

        # join thread

        def join() -> typing.Optional[pandas.DataFrame]:
            while error is None and thread.is_alive():
                thread.join(join_interval)

            if error is not None:
                raise error  # pylint: disable=raising-bad-type

            raw_join()

            return dataframe

        return join