def analyze_tables(self, analysis, tables=[], as_sql=False, **kwargs): """Base function for table level analysis""" if analysis not in self.template_dict['analysis']: raise Exception("'{}' not found in template for '{}'.".format( analysis, self.type)) if not tables and 'schema' in kwargs: # get all tables rows = self.get_schemas(kwargs['schema']) crt_obj = lambda r: struct( dict(schema=r.schema, table=r.object_name)) objs = [crt_obj(r) for r in rows] else: crt_obj = lambda schema, table: struct( dict(schema=schema, table=table)) objs = [crt_obj(*self._split_schema_table(t)) for t in tables] sql = ' \nunion all\n'.join([ self._template('analysis.' + analysis).format(schema=obj.schema, table=obj.table, **kwargs) for obj in objs ]) return sql if as_sql else self.select(sql, analysis, echo=False)
def __init__(self, conn_dict, profile=None, echo=False): "Inititate connection" self._cred = struct(conn_dict) self._cred.kwargs = conn_dict.get('kwargs', {}) self.name = self._cred.get('name', None) self.username = self._cred.get('username', None) self.type = self._cred.type self.engine = None self._cursor_description = None self.profile = profile self.batch_size = 10000 self.fetch_size = 20000 self.echo = echo self.connect() self.last_connect = now() # Base Template template_base_path = '{}/database/templates/base.yaml'.format( get_dir_path()) self.template_dict = read_yaml(template_base_path) # Specific Type Template template_path = '{}/database/templates/{}.yaml'.format( get_dir_path(), self.type) temp_dict = read_yaml(template_path) for key1 in temp_dict: # Level 1 if isinstance(temp_dict[key1], dict): if key1 not in self.template_dict: self.template_dict[key1] = temp_dict[key1] # Level 2 for key2 in temp_dict[key1]: # Always Overwrite self.template_dict[key1][key2] = temp_dict[key1][key2] else: # Level 1 Non-Dict Overwrite self.template_dict[key1] = temp_dict[key1] self.variables = self._template('variables') if os.getenv('PROFILE_YAML'): other_vars = get_variables() for key in other_vars: self.variables[key] = other_vars[key] self.tmp_folder = self.variables['tmp_folder'] self.set_variables() if echo: log("Connected to {} as {}".format(self._cred.name, self._cred.user))
def connect(self): "Connect / Re-Connect to Database" import pymongo c = struct(self._cred) self.conn = pymongo.MongoClient( 'mongodb://{host}:{port}/{database}'.format( host=c.host, port=c.port, database=c.database, )) self._cred['user'] = ''
def connect(self): "Connect / Re-Connect to Database" c = struct(self._cred) restart = c.restart if 'restart' in c else False hive_enabled = c.hive_enabled if 'hive_enabled' in c else False master = c.master if 'master' in c else None version = c.version if 'version' in c else None spark_home = c.spark_home if 'spark_home' in c else None self.sparko = Spark( restart=restart, hive_enabled=hive_enabled, master=master, spark_home=spark_home) self.application_id = self.sparko.sc._jsc.sc().applicationId() self._cred.name = self.name = "Spark" self.username = c.user
def handle_db_worker_req(worker: Worker, data_dict): """Handler for for a database worker request. Args: worker: the respective worker data_dict: the request payload dictionary """ data = struct(data_dict) if worker.type == 'monitor': send_to_webapp(data_dict) elif data.payload_type in ('task-error'): send_to_webapp(data_dict) elif data.payload_type in ('query-data'): send_to_webapp(data_dict) elif data.payload_type in ('meta-updated'): send_to_webapp(data_dict) else: send_to_webapp(data_dict)
def create_engine(self, conn_str=None, echo=False): from cx_Oracle import makedsn import sqlalchemy if conn_str: conn_str = 'oracle+cx_oracle://' + conn_str else: cred = struct(self._cred) if 'service' in cred: dns_str = makedsn(cred.host, cred.port, service_name=cred.service) elif 'sid' in cred: dns_str = makedsn(cred.host, cred.port, sid=cred.sid) else: dns_str = makedsn(cred.host, cred.port) conn_str = ( 'oracle+cx_oracle://{user}:{password}@' + dns_str).format(**cred) self.engine = sqlalchemy.create_engine(conn_str, pool_size=10, echo=echo) return self.engine
def connect(self): "Connect / Re-Connect to Database" import pyodbc cred = struct(self._cred) if isinstance(self._cred, dict) else None self.odbc_string = 'DRIVER={odbc_driver};SERVER={host};DATABASE={database};UID={user};PWD={password}'.format( odbc_driver=cred.odbc_driver, host=cred.host, database=cred.database, user=cred.user, password=cred.password, ) self.connection = pyodbc.connect(self.odbc_string) self.cursor = None # self.connection.autocommit = True self.name = 'sqlserver' self.username = cred.user if cred else '' cursor = self.get_cursor()
def get_conn(db, dbs=None, echo=True, reconnect=False, use_jdbc=False, conn_expire_min=10, spark_hive=False) -> DBConn: global conns dbs = dbs if dbs else get_databases() profile = get_profile() db_dict = struct(dbs[db]) if db_dict.type.lower() == 'hive' and spark_hive: db_dict.type = 'spark' use_jdbc = True if ( use_jdbc or ('use_jdbc' in db_dict and db_dict['use_jdbc'])) else use_jdbc if db in conns and not reconnect: if (now() - conns[db].last_connect).total_seconds() / 60 < conn_expire_min: return conns[db] if use_jdbc: log('*USING JDBC for ' + db) from .jdbc import JdbcConn conn = JdbcConn(db_dict, profile=profile) elif db_dict.type.lower() == 'oracle': from .oracle import OracleConn conn = OracleConn(db_dict, echo=echo) elif db_dict.type.lower() == 'spark': from .spark import SparkConn conn = SparkConn(db_dict, echo=echo) elif db_dict.type.lower() == 'hive': from .hive import HiveConn, Beeline if 'use_beeline' in db_dict and db_dict.use_beeline: conn = Beeline(db_dict, echo=echo) else: conn = HiveConn(db_dict, echo=echo) elif db_dict.type.lower() in ('postgresql', 'redshift'): from .postgresql import PostgreSQLConn conn = PostgreSQLConn(db_dict, echo=echo) elif db_dict.type.lower() == 'sqlserver': from .sqlserver import SQLServerConn conn = SQLServerConn(db_dict, echo=echo) elif db_dict.type.lower() == 'sqlite': from .sqlite import SQLiteConn conn = SQLiteConn(db_dict, echo=echo) else: raise Exception(f'Type {db_dict.type} not handled!') conns[db] = conn return conn
def handle_web_worker_req(web_worker: Worker, data_dict): """Handler for a web worker request Args: worker: the respective worker data_dict: the request payload dictionary Returns: True if successful. False if no worked is available. """ # print('data_dict: {}'.format(data_dict)) # return data = struct(data_dict) response_data = {} response_data_for_missing = { 'completed': False, 'payload_type': 'client-response', 'sid': data.sid, 'error': Exception('Request "{}" not handled!'.format(data.req_type)) } if data.req_type in ('submit-sql'): db_worker = get_or_create_worker(data.database) if db_worker is None: return False # send to worker queue db_worker.put_child_q(data_dict) response_data['worker_name'] = db_worker.name response_data['queued'] = True elif data.req_type == 'stop-worker': completed = stop_worker(data.worker_name) response_data = dict(completed=completed) elif data.req_type == 'add-worker': start_worker_db(data.database, start=True) response_data = dict(completed=True) elif data.req_type == 'set-state': store.state_set(data.key, data.value) response_data = dict(completed=True) elif data.req_type == 'set-database': store.sqlx('databases').replace_rec(**data.db_states) response_data = dict(completed=True) elif data.req_type == 'get-database': rec = store.sqlx('databases').select_one(fwa(db_name=data.db_name)) response_data = dict(completed=True, data=rec._asdict()) elif data.req_type == 'get-databases': databases = get_databases() get_rec = lambda d: dict(type=d['type']) response_data = dict(completed=True, data={ k: get_rec(databases[k]) for k in sorted(databases) if k.lower() not in ('tests', 'drivers') }) elif data.req_type == 'get-analysis-sql': db_worker = get_or_create_worker(data.database) if db_worker is None: return False db_worker.put_child_q(data_dict) response_data['queued'] = True elif data.req_type == 'get-meta-tables': where = "lower(db_name)=lower('{}')".format(data.database) if data.filter_schema: where = where + ''' and lower(schema_name) like lower('%{}%')'''.format( data.filter_schema) if data.filter_table: where = where + ''' and lower(table_name) like lower('%{}%')'''.format( data.filter_table) rows = store.sqlx('meta_tables').query(where, limit=data.limit) if rows: headers = store.sqlx('meta_tables').ntRec._fields rows = [list(r) for r in rows] response_data = dict(completed=True, headers=headers, rows=rows) else: db_worker = get_or_create_worker(data.database) if db_worker is None: return False db_worker.put_child_q(data_dict) response_data['queued'] = True elif data.req_type == 'get-meta-columns': log(str(data)) where = "lower(db_name)=lower('{}')".format(data.database) if data.filter_schema: where = where + ''' and lower(schema_name) like lower('%{}%')'''.format( data.filter_schema) if data.filter_table: where = where + ''' and lower(table_name) like lower('%{}%')'''.format( data.filter_table) if data.filter_column: where = where + ''' and lower(column_name) like lower('%{}%')'''.format( data.filter_column) rows = store.sqlx('meta_columns').query(where, limit=data.limit) if rows: headers = store.sqlx('meta_columns').ntRec._fields rows = [list(r) for r in rows] response_data = dict(completed=True, headers=headers, rows=rows) else: db_worker = get_or_create_worker(data.database) if db_worker is None: return False db_worker.put_child_q(data_dict) response_data['queued'] = True elif data.req_type == 'set-tab': store.sqlx('tabs').replace_rec(**data.tab_state) response_data = dict(completed=True) elif data.req_type == 'get-tab': rec = store.sqlx('tabs').select_one( fwa(db_name=data.db_name, tab_name=data.tab_name)) response_data = dict(completed=True, data=rec._asdict()) elif data.req_type == 'get-tasks': rows = store.sqlx('tasks').query( where= '1=1 order by end_date desc, start_date desc, queue_date desc', limit=100) recs = [row._asdict() for row in rows] response_data = dict(data=recs, completed=True) elif data.req_type == 'get-queries': rows = store.sqlx('queries').query(where=""" lower(sql_text) like '%{}%' and database = '{}' and sql_text <> '' order by exec_date desc """.format(data.filter.lower(), data.database), limit=int(data.limit)) recs = [row._asdict() for row in rows] response_data = dict(data=recs, completed=True) elif data.req_type == 'search-queries': where = "sql_text like '%{}%' order by exec_date desc".format( data.query_filter) rows = store.sqlx('queries').query(where=where, limit=100) recs = [row._asdict() for row in rows] response_data = dict(data=recs, completed=True) elif data.req_type == 'get-workers': make_rec = lambda wkr: dict( name=wkr.name, status=wkr.status, start_time=wkr.started, pid=wkr.pid, ) workers_data = [make_rec(wkr) for wkr in workers.values()] response_data = dict(data=workers_data, completed=True) elif data.req_type == 'reset-db': for wkr_nm in list(workers): if wkr_nm in ('webapp', 'mon'): continue stop_worker(wkr_nm) store.create_tables(drop_first=True, ask=False) response_data = dict(completed=True) # In case handle is missing. Also checked for completed if response_data: response_data['orig_req'] = data_dict response_data['payload_type'] = 'client-response' response_data['sid'] = data.sid response_data['completed'] = response_data.get('completed', False) res = '+Completed' if response_data[ 'completed'] else '+Queued' if 'queued' in response_data and response_data[ 'queued'] else '~Did not Complete' log('{} "{}" request "{}".'.format(res, data.req_type, data.id)) else: response_data = response_data_for_missing # Respond to WebApp Worker send_to_webapp(response_data) return True