def test_pystreamliner_multiple_tables(local_context): """ Tests if a pypeline reading and writing additional tables works """ ctx = local_context() setup_spark(ctx) config = pystreamliner_config(extractor="multiple_tables_extractor.py", transformer="multiple_tables_transformer.py") # create some reference data conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="information_schema") conn.execute("CREATE DATABASE reference") conn.execute("CREATE TABLE reference.data (foo int primary key)") conn.query("INSERT INTO reference.data VALUES (%s)", 1) ctx.pipeline_put(pipeline_id="pypeline", batch_interval=1, config=config) # wait for a couple more batches before stopping the pipeline batch_end = ctx.pipeline_wait_for_batches(pipeline_id="pypeline", count=5, timeout=300) assert batch_end["success"] assert batch_end["load"]["count"] == 1 ctx.pipeline_update(pipeline_id="pypeline", active=False) conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="db") resp = conn.get("SELECT COUNT(*) AS count FROM t") assert resp.count >= 5 num_rows = resp.count resp = conn.get("SELECT SUM(foo) AS f FROM t") assert resp.f == num_rows for i in range(num_rows): table = "pystreamliner.table%d" % i resp = conn.get("SELECT SUM(foo) AS f FROM " + table) assert resp.f == 1
def connect(): master_agg = '192.168.65.1' # 10.0.3.186:3306 conn = database.connect(host=master_agg, user='******', password='******', database=DB_NAME) return conn
def __init__(self): """Create a connection""" try: self.client = database.connect(host='localhost', port=3307, user='******', password="******" ,database='test') except Exception as e: raise
def get_connection(db=DATABASE): """ Returns a new connection to the database. """ return database.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=db)
def drop_database(params): with database.connect(host=params.db_host, port=params.db_port, user=params.db_user, password=params.db_pswd, database=params.db_name) as conn: conn.query('DROP DATABASE %s' % params.db_name)
def test_pystreamliner_sanity(local_context): """ Tests if a basic pypeline works """ ctx = local_context() setup_spark(ctx) config = pystreamliner_config(extractor="sanity_extractor.py", transformer="sanity_transformer.py") ctx.pipeline_put(pipeline_id="pypeline", batch_interval=1, config=config) # enable tracing and check that log messages are recorded appropriately ctx.pipeline_update(pipeline_id="pypeline", active=True, trace_batch_count=20) # wait for a couple more batches before stopping the pipeline batch_end = ctx.pipeline_wait_for_batches(pipeline_id="pypeline", count=10, timeout=120) assert batch_end["success"] assert batch_end["load"]["count"] == 1 assert batch_end["batch_type"] == "Traced" assert len(batch_end["extract"]["logs"]) == 1 assert "extractor info message" in batch_end["extract"]["logs"][0] assert len(batch_end["transform"]["logs"]) == 1 assert "transformer error message" in batch_end["transform"]["logs"][0] ctx.pipeline_update(pipeline_id="pypeline", active=False) conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="db") resp = conn.get("SELECT COUNT(*) AS count FROM t") assert resp.count >= 10 num_rows = resp.count resp = conn.get("SELECT SUM(foo_int_doubled) AS f FROM t") assert resp.f == 2 * sum(range(1, num_rows + 1))
def test_connection_options(test_db_args): args = copy.deepcopy(test_db_args) args["host"] = "memsql.com" args["options"] = { "connect_timeout": 1 } with pytest.raises(database.OperationalError): conn = database.connect(**args) conn.query("SHOW TABLES")
def query_autotags(interests, host=None, port=3306, user="******", pswd="", db=None): start_t = time.time() with database.connect(host=host, port=port, user=user, password=pswd, database=db) as conn: match_regex = ' AND '.join(['''{}":"$'''.format(key) for key in interests.keys()]) val_regex = ' AND '.join(['''CONVERT(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(a.autotags,'{}":"',-1),',',1),':',-1),DECIMAL)>=CONVERT({},DECIMAL)'''.format(key, value) for key, value in interests.items()]) query = '''select b.line_number, b.id, a.autotags, b.download_url from test_autotags a JOIN test_metadata b ON a.id = b.id WHERE match (a.autotags) against ('{}') AND {}'''.format(match_regex, val_regex) sql_response = conn.query(query) response = [] for res in sql_response: # tags_str = [t for t in res['autotags'].split(',') if any(key in t for key, val in interests.items())] if all(key in res['autotags'] for key in interests.keys()): tags_str = [] for t in res['autotags'].split(','): # print(t.split(":")) val = t.split(":") try: if len(val) == 2: val[1] = float(val[1]) if val[1] != '' else 0 if val[0] in interests and val[1] >= float(interests[val[0]]): tags_str.append(val[0] + ":" + str(val[1])) except: print(val[0], val[1]) if tags_str: print('\tID: {}'.format(res['id'])) print('\tAutotags: {}'.format(','.join(tags_str))) print('\tdownload_url: {}\n'.format(res['download_url'])) response.append(res) return response, time.time() - start_t
def test_connection_options(test_db_args): args = copy.deepcopy(test_db_args) args["host"] = "example.com" args["options"] = {"connect_timeout": 1} with pytest.raises(database.OperationalError): conn = database.connect(**args) conn.query("SHOW TABLES")
def x_conn(self, request, test_db_args, test_db_database): conn = database.connect(**test_db_args) conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database) conn.select_db(test_db_database) def cleanup(): conn.execute('DROP DATABASE %s' % test_db_database) request.addfinalizer(cleanup) return conn
def _x_conn(self, request, test_db_args, test_db_database): conn = database.connect(**test_db_args) conn.execute('CREATE DATABASE IF NOT EXISTS %s CHARACTER SET utf8 COLLATE utf8_general_ci' % test_db_database) conn.select_db(test_db_database) def cleanup(): conn.execute('DROP DATABASE %s' % test_db_database) request.addfinalizer(cleanup) return conn
def x_conn(self, request, test_db_args, test_db_database): conn = database.connect(**test_db_args) conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database) conn.execute('USE %s' % test_db_database) def cleanup(): conn.execute('DROP DATABASE %s' % test_db_database) request.addfinalizer(cleanup) return conn
def queue_setup(request, test_db_args, test_db_database): with database.connect(**test_db_args) as conn: conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database) test_db_args['database'] = test_db_database q = sql_step_queue.SQLStepQueue('test').connect(**test_db_args).setup() def cleanup(): q.destroy() request.addfinalizer(cleanup)
def manager_setup(request, test_db_args, test_db_database): with database.connect(**test_db_args) as conn: conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database) test_db_args['database'] = test_db_database q = sql_lock.SQLLockManager('test').connect(**test_db_args).setup() def cleanup(): q.destroy() request.addfinalizer(cleanup)
def _connect(self, host, port, user, password): db = None try: db = database.connect(host=host, port=port, user=user, password=password) self.log.debug("Connected to MemSQL") yield db except Exception: raise finally: if db: db.close()
def get_connection(self): for attempt in xrange(1,self.max_attempts+1): try: return database.connect(host=self.host, port=self.port, user=self.user, password=self.password, database=self.database) break; except Exception as e: print e.message, e.args if self.verbose: print "\tTrouble establishing a database connection, retrying... (attempt: %d/%d)" % (attempt, self.max_attempts) sleep(attempt) continue sys.exit('Establishing a database connection failed after 5 attempts, giving up.')
def run_query(self, query, user): cursor = None try: cursor = database.connect(**self.configuration.to_dict()) res = cursor.query(query) # column_names = [] # columns = [] # # for column in cursor.description: # column_name = column[COLUMN_NAME] # column_names.append(column_name) # # columns.append({ # 'name': column_name, # 'friendly_name': column_name, # 'type': types_map.get(column[COLUMN_TYPE], None) # }) rows = [ dict(zip(list(row.keys()), list(row.values()))) for row in res ] # ==================================================================================================== # temporary - until https://github.com/memsql/memsql-python/pull/8 gets merged # ==================================================================================================== columns = [] column_names = rows[0].keys() if rows else None if column_names: for column in column_names: columns.append({ 'name': column, 'friendly_name': column, 'type': TYPE_STRING }) data = {'columns': columns, 'rows': rows} json_data = json.dumps(data, cls=JSONEncoder) error = None except KeyboardInterrupt: cursor.close() error = "Query cancelled by user." json_data = None except Exception as e: logging.exception(e) raise sys.exc_info()[1], None, sys.exc_info()[2] finally: if cursor: cursor.close() return json_data, error
def get_connection(host=options.host, port=options.port, db=options.database): """ Returns a new connection to the database. """ if host is None: host = HOST if port is None: port = PORT return database.connect( host=host, port=port, user=options.user, password=options.password, database=db)
def run_query(self, query, user): cursor = None try: cursor = database.connect(**self.configuration.to_dict()) res = cursor.query(query) # column_names = [] # columns = [] # # for column in cursor.description: # column_name = column[COLUMN_NAME] # column_names.append(column_name) # # columns.append({ # 'name': column_name, # 'friendly_name': column_name, # 'type': types_map.get(column[COLUMN_TYPE], None) # }) rows = [dict(zip(list(row.keys()), list(row.values()))) for row in res] # ==================================================================================================== # temporary - until https://github.com/memsql/memsql-python/pull/8 gets merged # ==================================================================================================== columns = [] column_names = rows[0].keys() if rows else None if column_names: for column in column_names: columns.append({ 'name': column, 'friendly_name': column, 'type': TYPE_STRING }) data = {'columns': columns, 'rows': rows} json_data = json.dumps(data, cls=JSONEncoder) error = None except KeyboardInterrupt: cursor.close() error = "Query cancelled by user." json_data = None except Exception as e: logging.exception(e) raise sys.exc_info()[1], None, sys.exc_info()[2] finally: if cursor: cursor.close() return json_data, error
def run_query(self, query, user): cursor = None try: cursor = database.connect(**self.configuration.to_dict()) res = cursor.query(query) # column_names = [] # columns = [] # # for column in cursor.description: # column_name = column[COLUMN_NAME] # column_names.append(column_name) # # columns.append({ # 'name': column_name, # 'friendly_name': column_name, # 'type': types_map.get(column[COLUMN_TYPE], None) # }) rows = [dict(zip(row.keys(), row.values())) for row in res] # ==================================================================================================== # temporary - until https://github.com/memsql/memsql-python/pull/8 gets merged # ==================================================================================================== columns = [] column_names = rows[0].keys() if rows else None if column_names: for column in column_names: columns.append({ "name": column, "friendly_name": column, "type": TYPE_STRING }) data = {"columns": columns, "rows": rows} json_data = json_dumps(data) error = None except KeyboardInterrupt: cursor.close() error = "Query cancelled by user." json_data = None finally: if cursor: cursor.close() return json_data, error
def test_sanity(local_context): ctx = local_context() ctx.run_ops() ctx.deploy_memsql_cluster(num_aggs=0, num_leaves=1) ctx.deploy_spark() # wait for spark to be deployed time.sleep(30) # and then kill the spark interface so that we have spark resources to run a job ctx.stop_ops() ctx.kill_spark_interface() print "Running the job" resp = ctx.spark_submit("com.memsql.spark.examples.WriteToMemSQLApp") print("STDOUT: \n%s" % resp.output) print("STDERR: \n%s" % resp.stderr_output) conn = database.connect(host="127.0.0.1", port=3306, user="******", password="", database="memsqlrdd_db") assert conn.get("SELECT count(*) AS c FROM output").c == 1000
def get_connection(host=db_info.hostname, port=db_info.port, user=db_info.username, password=db_info.password, db=db_info.path.strip('/'), verbose=options.verbose): for attempt in xrange(1, 21): try: return database.connect(host=host, port=port, user=user, password=password, database=db) except: if verbose: print "\tTrouble establishing a database connection, retrying... (attempt: %d/20)" % attempt sleep(attempt * 2) continue
def _promote_child_agg_memsql(self, cluster, child_agg): cluster.save(currently_promoting_master=True) try: logger.info( "Promoting child aggregator at %s:%d to master" % (child_agg.data.host_ip, child_agg.data.memsql_port)) can_connect = False try: with database.connect(host=child_agg.data.host_ip, port=child_agg.data.memsql_port, user="******", password="") as conn: conn.query("SELECT 1") can_connect = True conn.execute("AGGREGATOR SET AS MASTER") except database.OperationalError as e: if can_connect: logger.error( "Could not promote child agg to master for cluster %s: %s" % (cluster.name, str(e))) self._rollback_cluster(cluster) return finally: cluster.save(currently_promoting_master=False)
def main(): # TODO: pull from config HOST = 'localhost' PORT = 3306 USER = '******' PASSWORD = '******' DATABASE = 'acme' conn = database.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE) try: conn.ping() id = create(conn, "Inserted row") print("Inserted row {0}".format(id)) row = read_one(conn, id) print(row, sep =',') update(conn, id, "Updated row") print("Updated row {0}".format(id)) rows = read_all(conn) print("All rows:") for row in rows: print(row, sep ='\t') delete(conn, id) print("Deleted row {0}".format(id)) except Exception as e: print("Error") print(e) traceback.print_exc(file =sys.stdout) finally: conn.close()
def get_connection(params, db=None): if not db: db = params.db_name """ Returns a new connection to the database. """ return database.connect(host=params.db_host, port=params.db_port, user=params.db_user, password=params.db_pswd, database=db)
def get_connection(): return database.connect(host='127.0.0.1', port='3306', user='******', password='', database='MemEx')
def bootstrap(request, test_db_args, test_db_database): with database.connect(**test_db_args) as conn: conn.execute('CREATE DATABASE IF NOT EXISTS %s' % test_db_database)
from memsql.common import database conn = database.connect(host="127.0.0.1", port=3306, user="******", password="") print(conn.query("show databases"))
def get_connection(self, params): return database.connect(host=params.db_host, port=params.db_port, user=params.db_user, password=params.db_pswd, database="yfcc_" + params.db_name)
def main(): # load the config and get cluster name and db connection options = getArgs() #config, specs = getConfigAndSpecs(options.config_path) config = {} config['scripts_path'] = '/tmp' # logging configuration base_logging_path = osPath.join(config['scripts_path'], G.LOG_DIRECTORY_NAME, __appname__) makeDirIfNeeded(base_logging_path) general_logging_path = osPath.join(base_logging_path, 'GENERAL') makeDirIfNeeded(general_logging_path) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') level = logging.DEBUG if options.verbose else logging.INFO handler = logging.FileHandler( osPath.join( general_logging_path, 'general_{}.log'.format( datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3]))) handler.setFormatter(formatter) log.addHandler(handler) log.setLevel(level) if options.console_log: console_handler = logging.StreamHandler() console_handler.setFormatter(formatter) log.addHandler(console_handler) log.info("In the beginning...") log.info("Hostname : {}".format(options.hostname)) log.info("Port number : {}".format(options.portnum)) log.info("Database Target : {}".format(options.database_name)) log.info("Source Directory : {}".format(options.source_dir)) log.info("Pipeline Name : {}".format(options.pipeline_name)) log.info("Done Directory : {}".format(options.done_dir)) conn_params = { 'user': '******', 'password': '', 'host': '0.0.0.0', 'port': 3306, 'database': 'ssb' } conn = database.connect(**conn_params) if not osPath.exists(options.source_dir): log.error("Source directory does not exhist!") exit(1) # look up pipeline_retries select_sql = ''' SELECT variable_value FROM information_schema.global_variables WHERE variable_name = 'PIPELINES_MAX_RETRIES_PER_BATCH_PARTITION' ''' log.debug('Pipeline retries query : ' + select_sql) row = conn.get(select_sql) pipeline_retries = int(row['variable_value']) log.info( 'Pipelines will retry files {} times before moving to Fail'.format( pipeline_retries)) # look up pipeline_name, state, and pipeline directory select_sql = ''' SELECT pipeline_name, state, config_json::$connection_string constr FROM information_schema.pipelines WHERE pipeline_name = '{}' and database_name = '{}' '''.format(options.pipeline_name, options.database_name) log.debug('Pipeline retries query : ' + select_sql) row2 = conn.get(select_sql) if row2: log.info('Pipeline {} exists'.format(row2['pipeline_name'])) pipeline_directory = osPath.dirname(row2['constr'].decode('utf-8')) log.info(pipeline_directory) else: log.error('Pipeline {} does NOT exist!'.format(options.pipeline_name)) return (-1) # Check to see if pipeline is running if row2['state'] != 'Running': log.error('Pipeline {} is not Running!!'.format(options.pipeline_name)) return (-1) # Move files to pipeline directory files = [f for f in glob(osPath.join(options.source_dir, "*")) if True] for f in files: bfile = osPath.basename(f) destfullfile = osPath.join(pipeline_directory, bfile) log.info("Move {} to {}".format(f, destfullfile)) rename(f, destfullfile) # Find files that have loaded files = [f for f in glob(osPath.join(pipeline_directory, "*")) if True] for f in files: log.info("Full filename in Pipeline Directory: {}".format(f)) bfile = osPath.basename(f) log.info("Files in Pipeline Directory: {}".format(bfile)) # lookup file load status in pipelines_offsets table select_sql = ''' SELECT pipeline_name, database_name, source_partition_id, latest_loaded_offset FROM information_schema.pipelines_offsets WHERE database_name = '{}' and pipeline_name = '{}' and source_partition_id like '%{}' '''.format(options.database_name, options.pipeline_name, bfile) log.debug('Pipeline retries query : ' + select_sql) row = conn.get(select_sql) # check for errors in the pipeline select_sql = ''' SELECT count(*) error_count FROM information_schema.pipelines_errors WHERE database_name = '{}' and pipeline_name = '{}' and batch_source_partition_id like '%{}' '''.format(options.database_name, options.pipeline_name, bfile) log.debug('Pipeline retries query : ' + select_sql) row2 = conn.get(select_sql) if row: if row['latest_loaded_offset'] == 0: if row2['error_count'] == 0: log.info( 'File {} of Pipeline {} has finished loading!!'.format( row['source_partition_id'], row['pipeline_name'])) destfullfile = osPath.join(options.done_dir, bfile) log.info('Moving file {} to {}'.format(f, destfullfile)) rename(f, destfullfile) if row2['error_count'] >= pipeline_retries: log.info( 'File {} of Pipeline {} has FAILED loading {} times!!'. format(row['source_partition_id'], row['pipeline_name'], pipeline_retries)) destfullfile = osPath.join(options.done_dir, bfile + '.fail') log.info('Moving file {} to {}'.format(f, destfullfile)) rename(f, destfullfile) else: log.info('File {} of Pipeline {} in process'.format( row['source_partition_id'], row['pipeline_name'])) conn.close()
from memsql.common import database from memsql.perf.network_tester import NetworkTester master_agg = 'master.cs.memcompute.com' test_node = 'leaf-1.cs.memcompute.com' iterations = 100 payload_size = 1024 * 500 conn = database.connect(host=master_agg, user='******') conn.execute('CREATE DATABASE IF NOT EXISTS performance') conn.execute('SET GLOBAL max_allowed_packet=%d' % (1024 * 1024 * 10)) m = NetworkTester(payload_size=payload_size).connect(host=master_agg, user='******', database='performance') if m.ready(): m.destroy() m.setup() n = NetworkTester().connect(host=test_node, user='******', database='performance') def pp(data, postfix, cb=lambda x: x): for k, v in data.items(): print k, cb(v), postfix print 'latancy' pp(n.estimate_latency(), 'ms') print '\nroundtrip' pp(n.estimate_roundtrip(iterations), 'MB/s', lambda x: (x / 1024 / 1024)) print '\nupload' pp(n.estimate_upload(iterations), 'MB/s', lambda x: (x / 1024 / 1024))
def test_db_conn(test_db_args): return database.connect(**test_db_args)
def create__connection(self): conn = database.connect(host=self.config.mysql_host, port=int(self.config.mysql_port), user=self.config.mysql_user, password=self.config.mysql_password, database=self.config.mysql_db) self.conn = conn