def run_and_measure(f, statements, concurrency, num_items=None, sampler=None): stats = Stats(sampler) measure = partial(aio.measure, stats, f) started = int(time() * 1000) aio.run_many(measure, statements, concurrency, num_items=num_items) ended = int(time() * 1000) return TimedStats(started, ended, stats)
def exec_instructions(self, instructions): filenames = instructions.statement_files filenames = (os.path.join(self.spec_dir, i) for i in filenames) lines = (line for fn in filenames for line in get_lines(fn)) statements = itertools.chain(as_statements(lines), instructions.statements) for stmt in statements: aio.run(self.client.execute, stmt) for data_file in instructions.data_files: inserts = as_bulk_queries(self._to_inserts(data_file), data_file.get('bulk_size', 5000)) concurrency = data_file.get('concurrency', 25) aio.run_many(self.client.execute_many, inserts, concurrency=concurrency) if self.client.is_cratedb: aio.run(self.client.execute, f"refresh table {data_file['target']}") for data_cmd in instructions.data_cmds: process = subprocess.Popen(data_cmd['cmd'], stdout=subprocess.PIPE, universal_newlines=True) target = data_cmd['target'] dicts = dicts_from_lines(process.stdout) inserts = as_bulk_queries((to_insert(target, d) for d in dicts), data_cmd.get('bulk_size', 5000)) concurrency = data_cmd.get('concurrency', 25) aio.run_many(self.client.execute_many, inserts, concurrency=concurrency) if self.client.is_cratedb: aio.run(self.client.execute, f"refresh table {target}")
def exec_instructions(self, instructions): filenames = instructions.statement_files filenames = (os.path.join(self.spec_dir, i) for i in filenames) lines = (line for fn in filenames for line in get_lines(fn)) statements = itertools.chain(as_statements(lines), instructions.statements) for stmt in statements: aio.run(self.client.execute, stmt) for data_file in instructions.data_files: inserts = as_bulk_queries(self._to_inserts(data_file), data_file.get('bulk_size', 5000)) concurrency = data_file.get('concurrency', 25) aio.run_many(self.client.execute_many, inserts, concurrency=concurrency) aio.run(self.client.execute, 'refresh table {target}'.format(target=data_file['target']))
def insert_json(table=None, bulk_size=1000, concurrency=25, hosts=None, infile=None, output_fmt=None): """Insert JSON lines from a file or stdin into a CrateDB cluster. If no hosts are specified the statements will be printed. Args: table: Target table name. bulk_size: Bulk size of the insert statements. concurrency: Number of operations to run concurrently. hosts: hostname:port pairs of the Crate nodes """ if not hosts: return print_only(infile, table) queries = (to_insert(table, d) for d in dicts_from_lines(infile)) bulk_queries = as_bulk_queries(queries, bulk_size) print('Executing inserts: bulk_size={} concurrency={}'.format( bulk_size, concurrency), file=sys.stderr) stats = Stats() with clients.client(hosts, concurrency=concurrency) as client: f = partial(aio.measure, stats, client.execute_many) try: aio.run_many(f, bulk_queries, concurrency) except clients.SqlException as e: raise SystemExit(str(e)) try: print(format_stats(stats.get(), output_fmt)) except KeyError: if not stats.sampler.values: raise SystemExit('No data received via stdin') raise
def insert_json(table=None, bulk_size=1000, concurrency=25, hosts=None, output_fmt=None): """Insert JSON lines fed into stdin into a Crate cluster. If no hosts are specified the statements will be printed. Args: table: Target table name. bulk_size: Bulk size of the insert statements. concurrency: Number of operations to run concurrently. hosts: hostname:port pairs of the Crate nodes """ if not hosts: return print_only(table) queries = (to_insert(table, d) for d in dicts_from_stdin()) bulk_queries = as_bulk_queries(queries, bulk_size) print('Executing inserts: bulk_size={} concurrency={}'.format( bulk_size, concurrency), file=sys.stderr) stats = Stats() with clients.client(hosts, concurrency=concurrency) as client: f = partial(aio.measure, stats, client.execute_many) try: aio.run_many(f, bulk_queries, concurrency) except clients.SqlException as e: raise SystemExit(str(e)) try: print(format_stats(stats.get(), output_fmt)) except KeyError: if not stats.sampler.values: raise SystemExit('No data received via stdin') raise
def warmup(self, stmt, num_warmup): statements = itertools.repeat((stmt,), num_warmup) aio.run_many(self.client.execute, statements, 0, num_items=num_warmup)
def warmup(self, stmt, num_warmup): statements = itertools.repeat((stmt, ), num_warmup) aio.run_many(self.client.execute, statements, 0, num_items=num_warmup)