def on_put(self, req, res, table, row): print('...put new row') api = {'table': table, 'errors': [], 'data': {}} client = kudu.connect(host='queen', port=7051) session = client.new_session() if client.table_exists(table): tb = client.table(table) sm = tb.schema data = json.loads(req.bounded_stream.read().decode("utf-8")) table = client.table(table) schema = {} for i in sm: schema[i.name] = i.type.name scanner = table.scanner() scanner.set_limit(1) op = table.new_insert() if not '_id' in data: data['_id'] = str(uuid.uuid4()).split('-')[4] for i in data: if i in schema: op[i] = data[i] api['data'][i] = data[i] session.apply(op) session.flush() else: api['errors'].append('Table does not exist') res.body = json.dumps(api) res.status = falcon.HTTP_200
def connect( self, host_or_hosts, port_or_ports=7051, rpc_timeout=None, admin_timeout=None, ): """ Pass-through connection interface to the Kudu client Parameters ---------- host_or_hosts : string or list of strings If you have multiple Kudu masters for HA, pass a list port_or_ports : int or list of int, default 7051 If you pass multiple host names, pass multiple ports rpc_timeout : kudu.TimeDelta See Kudu client documentation for details admin_timeout : kudu.TimeDelta See Kudu client documentation for details Returns ------- None """ self.client = kudu.connect( host_or_hosts, port_or_ports, rpc_timeout_ms=rpc_timeout, admin_timeout_ms=admin_timeout, )
def setUpClass(cls): cls.cluster_path, master_port = cls.start_cluster() time.sleep(1) cls.master_host = '127.0.0.1' cls.master_port = master_port cls.client = kudu.connect(cls.master_host, cls.master_port) # Wait for all tablet servers to start with the configured timeout timeout = time.time() + cls.TSERVER_START_TIMEOUT_SECS while len(cls.client.list_tablet_servers()) < cls.NUM_TABLET_SERVERS: if time.time() > timeout: raise TimeoutError( "Tablet servers took too long to start. Timeout set to {}". format(cls.TSERVER_START_TIMEOUT_SECS)) # Sleep 50 milliseconds to avoid tight-looping rpc time.sleep(0.05) cls.schema = cls.example_schema() cls.partitioning = cls.example_partitioning() cls.ex_table = 'example-table' if cls.client.table_exists(cls.ex_table): cls.client.delete_table(cls.ex_table) cls.client.create_table(cls.ex_table, cls.schema, cls.partitioning)
def setUpClass(cls): ImpalaE2E.setup_e2e(cls, ENV) cls.temp_tables = [] cls.kclient = kudu.connect(cls.env.master_host, cls.env.master_port) cls.con.kudu.connect(cls.env.master_host, cls.env.master_port)
def _get_scan_token_results(input): client = kudu.connect(input[1], input[2]) scanner = client.deserialize_token_into_scanner(input[0]) scanner.open() tuples = scanner.read_all_tuples() # Test explicit closing of scanner scanner.close() return tuples
def on_get(self, req, res, table): api = {'table': table} client = kudu.connect(host='queen', port=7051) if client.table_exists(table): api['success'] = 'Table ok' else: api['errors'].append('Table does not exist') res.body = json.dumps(api) res.status = falcon.HTTP_200
def setUpClass(cls): cls.cluster_proc, cls.master_hosts, cls.master_ports = cls.start_cluster() cls.client = kudu.connect(cls.master_hosts, cls.master_ports) cls.schema = cls.example_schema() cls.partitioning = cls.example_partitioning() cls.ex_table = 'example-table' if cls.client.table_exists(cls.ex_table): cls.client.delete_table(cls.ex_table) cls.client.create_table(cls.ex_table, cls.schema, cls.partitioning)
def on_delete(self, req, res, table): api = {'success': False} client = kudu.connect(host='queen', port=7051) if client.table_exists(table): client.delete_table(table) api['success'] = True else: api['error'] = ['Table does not exist'] res.body = json.dumps(api) res.status = falcon.HTTP_200
def __init__(self, name, partition_list): threading.Thread.__init__(self) self.name = name self.partitions = partition_list self.client = kudu.connect(host='ip-172-31-6-171', port=7051) # Open a table self.table = self.client.table('impala::DEFAULT.STATUS_TWEETS') # Create a new session so that we can apply write operations self.session = self.client.new_session()
def setUpClass(cls): cls.cluster_path, master_port = cls.start_cluster() time.sleep(1) cls.master_host = '127.0.0.1' cls.master_port = master_port cls.client = kudu.connect(cls.master_host, cls.master_port) cls.schema = cls.example_schema() cls.ex_table = 'example-table' if cls.client.table_exists(cls.ex_table): cls.client.delete_table(cls.ex_table) cls.client.create_table(cls.ex_table, cls.schema)
def on_get(self, req, res): self.api['method'] = 'GET' client = kudu.connect(host='queen', port=7051) tables = client.list_tables() for i in tables: table = client.table(i) sm = table.schema self.api['tables'][i] = sortedcontainers.SortedDict() for ii in sm: self.api['tables'][i][ii.name] = { 'type:': ii.type.name, 'nullable:': ii.nullable } res.body = json.dumps(self.api) res.status = falcon.HTTP_200
def on_get(self, req, res, table, row): api = {'table': table, 'row': row, 'errors': []} client = kudu.connect(host='queen', port=7051) if client.table_exists(table): print('...table exists', table) row_id = row table = client.table(table) scanner = table.scanner() api['scanner'] = dir(scanner) scanner.add_predicate(table['_id'] == row_id) ret = scanner.open().read_all_tuples() api['ret'] = ret else: api['errors'].append('Table does not exist') res.body = json.dumps(api) res.status = falcon.HTTP_200
def insert_to_kudu(rdd): client = kudu.connect(host=KUDU_MASTER, port=7051) table = client.table('galaxy_measurements') session = client.new_session() for line in rdd.toLocalIterator(): cols = line.split(',') op = table.new_insert({ 'measurement_id': cols[0], 'detector_id': int(cols[1]), 'galaxy_id': int(cols[2]), 'astrophysicist_id': int(cols[3]), 'measurement_time': float(cols[4]), 'amplitude_1': float(cols[5]), 'amplitude_2': float(cols[6]), 'amplitude_3': float(cols[7]), 'wave': float(cols[5]) > 0.995 and float(cols[7]) > 0.995 and float(cols[6]) < 0.005, }) session.apply(op) session.flush()
def on_put(self, req, res, table): api = {'table': table, 'success': False} data = json.loads(req.bounded_stream.read().decode("utf-8")) client = kudu.connect(host='queen', port=7051) if not client.table_exists(table): builder = kudu.schema_builder() builder.add_column('_id').type( kudu.string).nullable(False).primary_key() if data: for i in data: if data[i] == 'string': builder.add_column(i).type(kudu.string) elif data[i] == 'int': builder.add_column(i).type(kudu.int64) elif data[i] == 'time': builder.add_column(i).type(kudu.unixtime_micros) elif data[i] == 'float': builder.add_column(i).type(kudu.float) elif data[i] == 'double': builder.add_column(i).type(kudu.float) elif data[i] == 'decimal': builder.add_column(i).type(kudu.decimal) elif data[i] == 'binary': builder.add_column(i).type(kudu.binary) elif data[i] == 'bool': builder.add_column(i).type(kudu.bool) else: builder.add_column(i).type(kudu.string) schema = builder.build() partitioning = Partitioning().add_hash_partitions( column_names=['_id'], num_buckets=3) client.create_table(table, schema, partitioning) api['success'] = True res.body = json.dumps(api) res.status = falcon.HTTP_200
def on_update(self, req, res, table): self.api['method'] = 'UPDATE' self.api['table'] = table self.api['exists'] = False self.api['update'] = [] self.api['error'] = [] client = kudu.connect(host='queen', port=7051) self.api['exists'] = client.table_exists(table) if self.api['exists']: self.api['rename'] = req.get_param('name') self.api['data'] = json.load(req.bounded_stream) otable = client.table(table) alt = client.new_table_alterer(otable) if self.api['data']: if 'cols' in self.api['data']: for i in self.api['data']['cols']: try: alt.alter_column(i, self.api['data']['cols'][i]) alt.alter() self.api['update'].append( [i, self.api['data']['cols'][i]]) except Exception as e: self.api['error'].append(str(e)) if self.api['rename']: try: t = alt.rename(self.api['rename']).alter() except Exception as e: self.api['error'].append(str(e)) #alt.add_column('tests').type(kudu.string) #alt.alter() #self.api['log'] = str(dir(alt)) res.body = json.dumps(self.api) res.status = falcon.HTTP_200
def setUpClass(cls): cls.cluster_path, cls.master_hosts, cls.master_ports = cls.start_cluster() time.sleep(1) cls.client = kudu.connect(cls.master_hosts, cls.master_ports) # Wait for all tablet servers to start with the configured timeout timeout = time.time() + cls.TSERVER_START_TIMEOUT_SECS while len(cls.client.list_tablet_servers()) < cls.NUM_TABLET_SERVERS: if time.time() > timeout: raise TimeoutError( "Tablet servers took too long to start. Timeout set to {}" .format(cls.TSERVER_START_TIMEOUT_SECS)) # Sleep 50 milliseconds to avoid tight-looping rpc time.sleep(0.05) cls.schema = cls.example_schema() cls.partitioning = cls.example_partitioning() cls.ex_table = 'example-table' if cls.client.table_exists(cls.ex_table): cls.client.delete_table(cls.ex_table) cls.client.create_table(cls.ex_table, cls.schema, cls.partitioning)
tmpdir = tempfile.mkdtemp() path = os.path.join(tmpdir, "dstat.pipe") os.mkfifo(path) proc = subprocess.Popen(["dstat", "-cdngy", "--output", "{0}".format(path)]) return proc.pid, path if __name__ == "__main__": drop = False if len(sys.argv) > 1: operation = sys.argv[1] if operation in ["drop"]: drop = True client = kudu.connect("127.0.0.1", 7051) table = open_or_create_table(client, "dstat", drop) # Start dstat dstat_id, pipe_path = start_dstat() try: # Create file handle to read from pipe fid = open(pipe_path, "r") # Create session object session = client.new_session() counter = 0 # The dstat output first prints uninteresting lines, skip until we find the header skip = True
def test_connect_timeouts(self): # it works! any other way to check kudu.connect(self.master_hosts, self.master_ports, admin_timeout_ms=1000, rpc_timeout_ms=1000)
os.mkfifo(path) proc = subprocess.Popen( ["dstat", "-cdngy", "--output", "{0}".format(path)]) return proc.pid, path if __name__ == "__main__": drop = False if len(sys.argv) > 1: operation = sys.argv[1] if operation in ["drop"]: drop = True client = kudu.connect("127.0.0.1", 7051) table = open_or_create_table(client, "dstat", drop) # Start dstat dstat_id, pipe_path = start_dstat() try: # Create file handle to read from pipe fid = open(pipe_path, "r") # Create session object session = client.new_session() counter = 0 # The dstat output first prints uninteresting lines, skip until we find the header skip = True
import kudu from kudu.client import Partitioning from datetime import datetime # Connect to Kudu master server client = kudu.connect(host='ip.kuduMaster', port=7051) # Define a schema for a new table builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4') schema = builder.build() # Define partitioning schema partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) # Create new table client.create_table('python-example2', schema, partitioning) # Open a table table = client.table('python-example2') # Create a new session so that we can apply write operations session = client.new_session() # Insert a row op = table.new_insert({'key': 1, 'ts_val': datetime.utcnow()})
import sys import kudu client = kudu.connect('localhost', 7051) table = client.table('payment_history') # Defining table name print(table.schema, "\n") scanner = table.scanner() # scanner.add_predicate(table['timestamp'] == '2016-05-15 10:00:00') id_col = table['txn_id'] # scanner.add_predicate(table['timestamp'] == '2014-12-02 02:00:00') # scanner.add_predicates([id_col >= 7292, id_col <= 7295]) scanner.add_predicates([id_col >= 7292, id_col <= 7295]) scanner.open() print(scanner.read_all_tuples())
def __init__(self): # Connect to Kudu master server self.client = kudu.connect(host='master', port=7051) self.session = self.client.new_session() # session没有关闭的方法
def _connect_kudu(self): self._kudu_client = kudu.connect(host=self._config['kudu_master'], port=self._config['kudu_port']) self._kudu_session = self._kudu_client.new_session()
def random_date(): """ Return random date (str, format YYYY-MM-DD) """ year = str(random.randint(2000, 2018)) month = str(random.randint(1, 12)).rjust(2, '0') day = str(random.randint(1, 28)).rjust(2, '0') d = '%s-%s-%s' % (year, month, day) return d if __name__ == '__main__': client = kudu.connect(host=['x', 'x', 'x'], port=7051) # Open a table table = client.table('test_tweet') # Create a new session so that we can apply write operations session = client.new_session() for i in range(0, 1000000): uniq_id = str(i) author = random_str(10) date_tweet = random_date() tweet = random_str(200) rate = random.randint(0, 5) # Insert a row op = table.new_insert({ 'id': uniq_id, 'author': author,
"--port", help="target kudu port", required=False, default=7051, type=int) return parser.parse_args() if __name__ == '__main__': print(f'| start at {datetime.datetime.now():%Y-%m-%d %H:%M:%S}') start_time: datetime.datetime = datetime.datetime.now() args: argparse.Namespace = args_parse() client: kudu.Client = kudu.connect(args.ip, args.port) ss: kudu.Session = client.new_session() tb_reader = client.table(args.table).scanner() tb_reader.open() with open(args.file, 'w') as csv_file: print(f'| CSV file name : {args.file}') cnt: int = 0 csv_writer: csv.writer = csv.writer(csv_file, delimiter=',', quotechar='\'') while tb_reader.has_more_rows(): for row in tb_reader.next_batch().as_tuples(): csv_writer.writerow(list(row))
import kudu from kudu.client import Partitioning # Parse arguments parser = argparse.ArgumentParser(description='Basic Example for Kudu Python.') parser.add_argument('--masters', '-m', nargs='+', default='localhost', help='The master address(es) to connect to Kudu.') parser.add_argument('--ports', '-p', nargs='+', default='7051', help='The master server port(s) to connect to Kudu.') args = parser.parse_args() # Connect to Kudu master server(s). client = kudu.connect(host=args.masters, port=args.ports) # Define a schema for a new table. builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4') schema = builder.build() # Define the partitioning schema. partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) # Delete table if it already exists. if client.table_exists('python-example'): client.delete_table('python-example') # Create a new table.
import kudu from kudu.client import Partitioning from datetime import datetime # Connect to Kudu master server # 连接 kudu master 服务 client = kudu.connect(host='master', port=7051) # Define a schema for a new table # 为表定义一个模式 builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('t1', type_=kudu.int64) builder.add_column('t2', type_=kudu.int64) schema = builder.build() # Define partitioning schema partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) # Create new table #client.create_table('python3', schema, partitioning) # Open a table table = client.table('python3') i = 0 # Create a new session so that we can apply write operations while i < 100: session = client.new_session() n = 0 print(i)
p_regionnm = '0000' cnt = 0 mem_no = '' # User Variable Definition ing_cnt = 0 ###### GET CURRENT TIME today = date.today() current_time = strftime("%Y%m%d%H%M%S", localtime()) ########## KUDU CONNECTION & TABLE CREATE ############################################################## client = kudu.connect(host="192.168.0.1", port=7051) # Define a schema for a new table builder = kudu.schema_builder() builder.add_column('DT').type(kudu.string).nullable(False) builder.add_column('FILE_TYPE').type(kudu.string).nullable(False) builder.add_column('SEQ').type(kudu.int64).nullable(False) builder.add_column('V1').type(kudu.string).nullable(True) builder.add_column('V2').type(kudu.string).nullable(True) builder.add_column('V3').type(kudu.string).nullable(True) builder.add_column('MOD_DTS', type_=kudu.unixtime_micros, nullable=False, compression='lz4')
#!/usr/bin/env python import time import kudu from kudu.client import Partitioning from datetime import datetime table_name = 'master_foo' # Mount/connect the Kudu queen client = kudu.connect(host='queen', port=7051) builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('name').type(kudu.string) schema = builder.build() partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) try: print('...try to open the table') table = client.table(table_name) except Exception as e: print('...create table') client.create_table(table_name, schema, partitioning) print('...wait 3 sec before access the table') time.sleep(3) table = client.table(table_name) no = 10000 for i in range(no): print('add row {}'.format(no-i)) op = table.new_insert({'key': i, 'name': 'foo{}'.format(i)})
import kudu client = kudu.connect(host='127.0.0.1', port=7051) result = client.list_tables() print result
def test_connect_timeouts(self): # it works! any other way to check kudu.connect(self.master_hosts, self.master_ports, admin_timeout_ms=100, rpc_timeout_ms=100)
def initKudu(kuduMaster): # Connect to Kudu master server print("Connecting to kudu [Kudu Master: {}]".format(kuduMaster)) client = kudu.connect(host=kuduMaster, port=7051) return client
def _get_scan_token_results(input): client = kudu.connect(input[1], input[2]) scanner = client.deserialize_token_into_scanner(input[0]) scanner.open() return scanner.read_all_tuples()
# Parse arguments parser = argparse.ArgumentParser(description='Basic Example for Kudu Python.') parser.add_argument('--masters', '-m', nargs='+', default='localhost', help='The master address(es) to connect to Kudu.') parser.add_argument('--ports', '-p', nargs='+', default='7051', help='The master server port(s) to connect to Kudu.') args = parser.parse_args() # Connect to Kudu master server(s). client = kudu.connect(host=args.masters, port=args.ports) # Define a schema for a new table. builder = kudu.schema_builder() builder.add_column('key').type(kudu.int64).nullable(False).primary_key() builder.add_column('ts_val', type_=kudu.unixtime_micros, nullable=False, compression='lz4') schema = builder.build() # Define the partitioning schema. partitioning = Partitioning().add_hash_partitions(column_names=['key'], num_buckets=3) # Delete table if it already exists.