Пример #1
0
    def analyze_tables(self, analysis, tables=[], as_sql=False, **kwargs):
        """Base function for table level analysis"""
        if analysis not in self.template_dict['analysis']:
            raise Exception("'{}' not found in template for '{}'.".format(
                analysis, self.type))

        if not tables and 'schema' in kwargs:
            # get all tables
            rows = self.get_schemas(kwargs['schema'])
            crt_obj = lambda r: struct(
                dict(schema=r.schema, table=r.object_name))
            objs = [crt_obj(r) for r in rows]
        else:
            crt_obj = lambda schema, table: struct(
                dict(schema=schema, table=table))
            objs = [crt_obj(*self._split_schema_table(t)) for t in tables]

        sql = ' \nunion all\n'.join([
            self._template('analysis.' + analysis).format(schema=obj.schema,
                                                          table=obj.table,
                                                          **kwargs)
            for obj in objs
        ])

        return sql if as_sql else self.select(sql, analysis, echo=False)
Пример #2
0
    def __init__(self, conn_dict, profile=None, echo=False):
        "Inititate connection"
        self._cred = struct(conn_dict)
        self._cred.kwargs = conn_dict.get('kwargs', {})
        self.name = self._cred.get('name', None)
        self.username = self._cred.get('username', None)
        self.type = self._cred.type
        self.engine = None
        self._cursor_description = None
        self.profile = profile
        self.batch_size = 10000
        self.fetch_size = 20000
        self.echo = echo
        self.connect()
        self.last_connect = now()

        # Base Template
        template_base_path = '{}/database/templates/base.yaml'.format(
            get_dir_path())
        self.template_dict = read_yaml(template_base_path)

        # Specific Type Template
        template_path = '{}/database/templates/{}.yaml'.format(
            get_dir_path(), self.type)
        temp_dict = read_yaml(template_path)

        for key1 in temp_dict:
            # Level 1
            if isinstance(temp_dict[key1], dict):
                if key1 not in self.template_dict:
                    self.template_dict[key1] = temp_dict[key1]

                # Level 2
                for key2 in temp_dict[key1]:
                    # Always Overwrite
                    self.template_dict[key1][key2] = temp_dict[key1][key2]
            else:
                # Level 1 Non-Dict Overwrite
                self.template_dict[key1] = temp_dict[key1]

        self.variables = self._template('variables')

        if os.getenv('PROFILE_YAML'):
            other_vars = get_variables()
            for key in other_vars:
                self.variables[key] = other_vars[key]

        self.tmp_folder = self.variables['tmp_folder']
        self.set_variables()

        if echo:
            log("Connected to {} as {}".format(self._cred.name,
                                               self._cred.user))
Пример #3
0
    def connect(self):
        "Connect / Re-Connect to Database"
        import pymongo
        c = struct(self._cred)

        self.conn = pymongo.MongoClient(
            'mongodb://{host}:{port}/{database}'.format(
                host=c.host,
                port=c.port,
                database=c.database,
            ))

        self._cred['user'] = ''
Пример #4
0
  def connect(self):
    "Connect / Re-Connect to Database"
    c = struct(self._cred)
    restart = c.restart if 'restart' in c else False
    hive_enabled = c.hive_enabled if 'hive_enabled' in c else False
    master = c.master if 'master' in c else None
    version = c.version if 'version' in c else None
    spark_home = c.spark_home if 'spark_home' in c else None
    self.sparko = Spark(
      restart=restart,
      hive_enabled=hive_enabled,
      master=master,
      spark_home=spark_home)
    self.application_id = self.sparko.sc._jsc.sc().applicationId()

    self._cred.name = self.name = "Spark"
    self.username = c.user
Пример #5
0
def handle_db_worker_req(worker: Worker, data_dict):
    """Handler for for a database worker request.
  
  Args:
    worker: the respective worker
    data_dict: the request payload dictionary
  """
    data = struct(data_dict)
    if worker.type == 'monitor':
        send_to_webapp(data_dict)
    elif data.payload_type in ('task-error'):
        send_to_webapp(data_dict)
    elif data.payload_type in ('query-data'):
        send_to_webapp(data_dict)
    elif data.payload_type in ('meta-updated'):
        send_to_webapp(data_dict)
    else:
        send_to_webapp(data_dict)
Пример #6
0
  def create_engine(self, conn_str=None, echo=False):
    from cx_Oracle import makedsn
    import sqlalchemy
    if conn_str:
      conn_str = 'oracle+cx_oracle://' + conn_str
    else:
      cred = struct(self._cred)
      if 'service' in cred:
        dns_str = makedsn(cred.host, cred.port, service_name=cred.service)
      elif 'sid' in cred:
        dns_str = makedsn(cred.host, cred.port, sid=cred.sid)
      else:
        dns_str = makedsn(cred.host, cred.port)

      conn_str = (
        'oracle+cx_oracle://{user}:{password}@' + dns_str).format(**cred)

    self.engine = sqlalchemy.create_engine(conn_str, pool_size=10, echo=echo)

    return self.engine
Пример #7
0
    def connect(self):
        "Connect / Re-Connect to Database"
        import pyodbc

        cred = struct(self._cred) if isinstance(self._cred, dict) else None

        self.odbc_string = 'DRIVER={odbc_driver};SERVER={host};DATABASE={database};UID={user};PWD={password}'.format(
            odbc_driver=cred.odbc_driver,
            host=cred.host,
            database=cred.database,
            user=cred.user,
            password=cred.password,
        )
        self.connection = pyodbc.connect(self.odbc_string)

        self.cursor = None

        # self.connection.autocommit = True
        self.name = 'sqlserver'
        self.username = cred.user if cred else ''

        cursor = self.get_cursor()
Пример #8
0
def get_conn(db,
             dbs=None,
             echo=True,
             reconnect=False,
             use_jdbc=False,
             conn_expire_min=10,
             spark_hive=False) -> DBConn:
    global conns

    dbs = dbs if dbs else get_databases()
    profile = get_profile()
    db_dict = struct(dbs[db])

    if db_dict.type.lower() == 'hive' and spark_hive:
        db_dict.type = 'spark'

    use_jdbc = True if (
        use_jdbc or
        ('use_jdbc' in db_dict and db_dict['use_jdbc'])) else use_jdbc

    if db in conns and not reconnect:
        if (now() -
                conns[db].last_connect).total_seconds() / 60 < conn_expire_min:
            return conns[db]

    if use_jdbc:
        log('*USING JDBC for ' + db)
        from .jdbc import JdbcConn
        conn = JdbcConn(db_dict, profile=profile)

    elif db_dict.type.lower() == 'oracle':
        from .oracle import OracleConn
        conn = OracleConn(db_dict, echo=echo)

    elif db_dict.type.lower() == 'spark':
        from .spark import SparkConn
        conn = SparkConn(db_dict, echo=echo)

    elif db_dict.type.lower() == 'hive':
        from .hive import HiveConn, Beeline
        if 'use_beeline' in db_dict and db_dict.use_beeline:
            conn = Beeline(db_dict, echo=echo)
        else:
            conn = HiveConn(db_dict, echo=echo)

    elif db_dict.type.lower() in ('postgresql', 'redshift'):
        from .postgresql import PostgreSQLConn
        conn = PostgreSQLConn(db_dict, echo=echo)

    elif db_dict.type.lower() == 'sqlserver':
        from .sqlserver import SQLServerConn
        conn = SQLServerConn(db_dict, echo=echo)

    elif db_dict.type.lower() == 'sqlite':
        from .sqlite import SQLiteConn
        conn = SQLiteConn(db_dict, echo=echo)
    else:
        raise Exception(f'Type {db_dict.type} not handled!')

    conns[db] = conn
    return conn
Пример #9
0
def handle_web_worker_req(web_worker: Worker, data_dict):
    """Handler for a web worker request
  
  Args:
    worker: the respective worker
    data_dict: the request payload dictionary
  
  Returns:
    True if successful. False if no worked is available.
  """
    # print('data_dict: {}'.format(data_dict))
    # return
    data = struct(data_dict)
    response_data = {}
    response_data_for_missing = {
        'completed': False,
        'payload_type': 'client-response',
        'sid': data.sid,
        'error': Exception('Request "{}" not handled!'.format(data.req_type))
    }

    if data.req_type in ('submit-sql'):
        db_worker = get_or_create_worker(data.database)
        if db_worker is None: return False

        # send to worker queue
        db_worker.put_child_q(data_dict)
        response_data['worker_name'] = db_worker.name
        response_data['queued'] = True

    elif data.req_type == 'stop-worker':
        completed = stop_worker(data.worker_name)
        response_data = dict(completed=completed)

    elif data.req_type == 'add-worker':
        start_worker_db(data.database, start=True)
        response_data = dict(completed=True)

    elif data.req_type == 'set-state':
        store.state_set(data.key, data.value)
        response_data = dict(completed=True)

    elif data.req_type == 'set-database':
        store.sqlx('databases').replace_rec(**data.db_states)
        response_data = dict(completed=True)

    elif data.req_type == 'get-database':
        rec = store.sqlx('databases').select_one(fwa(db_name=data.db_name))
        response_data = dict(completed=True, data=rec._asdict())

    elif data.req_type == 'get-databases':
        databases = get_databases()
        get_rec = lambda d: dict(type=d['type'])
        response_data = dict(completed=True,
                             data={
                                 k: get_rec(databases[k])
                                 for k in sorted(databases)
                                 if k.lower() not in ('tests', 'drivers')
                             })

    elif data.req_type == 'get-analysis-sql':
        db_worker = get_or_create_worker(data.database)
        if db_worker is None: return False
        db_worker.put_child_q(data_dict)
        response_data['queued'] = True

    elif data.req_type == 'get-meta-tables':
        where = "lower(db_name)=lower('{}')".format(data.database)
        if data.filter_schema:
            where = where + ''' and lower(schema_name) like lower('%{}%')'''.format(
                data.filter_schema)
        if data.filter_table:
            where = where + ''' and lower(table_name) like lower('%{}%')'''.format(
                data.filter_table)
        rows = store.sqlx('meta_tables').query(where, limit=data.limit)
        if rows:
            headers = store.sqlx('meta_tables').ntRec._fields
            rows = [list(r) for r in rows]
            response_data = dict(completed=True, headers=headers, rows=rows)
        else:
            db_worker = get_or_create_worker(data.database)
            if db_worker is None: return False
            db_worker.put_child_q(data_dict)
            response_data['queued'] = True

    elif data.req_type == 'get-meta-columns':
        log(str(data))
        where = "lower(db_name)=lower('{}')".format(data.database)
        if data.filter_schema:
            where = where + ''' and lower(schema_name) like lower('%{}%')'''.format(
                data.filter_schema)
        if data.filter_table:
            where = where + ''' and lower(table_name) like lower('%{}%')'''.format(
                data.filter_table)
        if data.filter_column:
            where = where + ''' and lower(column_name) like lower('%{}%')'''.format(
                data.filter_column)
        rows = store.sqlx('meta_columns').query(where, limit=data.limit)
        if rows:
            headers = store.sqlx('meta_columns').ntRec._fields
            rows = [list(r) for r in rows]
            response_data = dict(completed=True, headers=headers, rows=rows)
        else:
            db_worker = get_or_create_worker(data.database)
            if db_worker is None: return False
            db_worker.put_child_q(data_dict)
            response_data['queued'] = True

    elif data.req_type == 'set-tab':
        store.sqlx('tabs').replace_rec(**data.tab_state)
        response_data = dict(completed=True)

    elif data.req_type == 'get-tab':
        rec = store.sqlx('tabs').select_one(
            fwa(db_name=data.db_name, tab_name=data.tab_name))
        response_data = dict(completed=True, data=rec._asdict())

    elif data.req_type == 'get-tasks':
        rows = store.sqlx('tasks').query(
            where=
            '1=1 order by end_date desc, start_date desc, queue_date desc',
            limit=100)
        recs = [row._asdict() for row in rows]
        response_data = dict(data=recs, completed=True)

    elif data.req_type == 'get-queries':
        rows = store.sqlx('queries').query(where="""
        lower(sql_text) like '%{}%'
        and database = '{}'
        and sql_text <> ''
        order by exec_date desc
      """.format(data.filter.lower(), data.database),
                                           limit=int(data.limit))
        recs = [row._asdict() for row in rows]
        response_data = dict(data=recs, completed=True)

    elif data.req_type == 'search-queries':
        where = "sql_text like '%{}%' order by exec_date desc".format(
            data.query_filter)
        rows = store.sqlx('queries').query(where=where, limit=100)
        recs = [row._asdict() for row in rows]
        response_data = dict(data=recs, completed=True)

    elif data.req_type == 'get-workers':
        make_rec = lambda wkr: dict(
            name=wkr.name,
            status=wkr.status,
            start_time=wkr.started,
            pid=wkr.pid,
        )
        workers_data = [make_rec(wkr) for wkr in workers.values()]
        response_data = dict(data=workers_data, completed=True)
    elif data.req_type == 'reset-db':
        for wkr_nm in list(workers):
            if wkr_nm in ('webapp', 'mon'): continue
            stop_worker(wkr_nm)
        store.create_tables(drop_first=True, ask=False)
        response_data = dict(completed=True)

    # In case handle is missing. Also checked for completed
    if response_data:
        response_data['orig_req'] = data_dict
        response_data['payload_type'] = 'client-response'
        response_data['sid'] = data.sid
        response_data['completed'] = response_data.get('completed', False)
        res = '+Completed' if response_data[
            'completed'] else '+Queued' if 'queued' in response_data and response_data[
                'queued'] else '~Did not Complete'
        log('{} "{}" request "{}".'.format(res, data.req_type, data.id))
    else:
        response_data = response_data_for_missing

    # Respond to WebApp Worker
    send_to_webapp(response_data)

    return True