예제 #1
0
 def show_sql(self, pretty_print=False, **params):
     q = self._get_query()
     sql = VulknSQLFormatter().format(q, **params) if params else q
     statements = vulkn.sql.SQLMessage(sql).statements()
     if len(statements) > 1:
         raise Exception('More than one statement is not supported')
     return sqlformat(statements[0].optimize(), oneline=not pretty_print)
예제 #2
0
 def _q(self, query: str, settings: dict = None) -> dict:
     if self._http is None:
         http_kwargs = {}
         if self._auth['insecure']:
             http_kwargs['cert_reqs'] = 'CERT_NONE'
             requests.packages.urllib3.disable_warnings()
         log.debug(http_kwargs)
         self._http = urllib3.PoolManager(**http_kwargs)
     host = self._auth.get('host')
     if not host.startswith('http'):
         host = f'http://{host}'
     port = self._auth.get('http_port') or self._auth.get('port') or '8123'
     log.log(LogLevels.SQL, sqlformat(query))
     headers = {
         'X-ClickHouse-User': self._auth.get('user'),
         'X-ClickHouse-Key': self._auth.get('password')
     }
     body = f'{query}'
     if (query.strip().lower().startswith('select')
             or query.strip().lower().startswith('with')
             or query.strip().lower().startswith('show')):
         body = f'{body} FORMAT TSVWithNamesAndTypes'
     s = '' if not settings else '&'.join(
         [f'{k}={i}'.format(k, i) for k, i in settings.items()])
     if s != '':
         log.log(LogLevels.SQL, s)
     r = self._http.request('POST',
                            f'{host}:{port}/?{s}',
                            headers=headers,
                            body=body)
     return r
예제 #3
0
파일: reader.py 프로젝트: VulknData/vulkn
def marshal_columns(src_database, src_table, target_database, target_table,
                    columns, types):
    cols = []
    for i, t in enumerate(types):
        c = 'nullIf(trim(toString("{}")), \'\')'.format(columns[i])
        if t in ('DateTime', 'Date'):
            c = 'parseDateTimeBestEffortOrNull({})'.format(c)
        cols.append(c)
    columns = ','.join(cols)
    database = target_database
    table = target_table
    sql = f'INSERT INTO {database}.{table} SELECT {columns} FROM {src_database}.{src_table}'
    return sqlformat(sql)
예제 #4
0
파일: cli.py 프로젝트: scottwedge/vulkn
 def _q(self, cli, query, settings=None):
     if cli is None:
         flag_opts_overrides = {'disable_suggestion': True}
         opts_overrides = {'format': 'TSVWithNamesAndTypes'}
         cli = self._cli(flag_opts_overrides=flag_opts_overrides,
                         opts_overrides=opts_overrides,
                         settings_overrides=settings)
     log.log(5, quote_shlex(cli))
     log.log(LogLevels.SQL, sqlformat(query))
     p = subprocess.run(cli,
                        input=query,
                        stdout=subprocess.PIPE,
                        universal_newlines=True)
     return p
예제 #5
0
파일: reader.py 프로젝트: VulknData/vulkn
def infer_column_type(database, table, column, sample=0):
    sample = 0
    sql = """
        SELECT
            '{column}' AS col_name,
            any(nullable) AS nullable,
            (uniq_approx < 6 AND recommended_type = 'String') AS enum,
            (uniq_approx > 10 AND uniq_approx < 1000000 AND recommended_type = 'String') AS low_cardinality,
            recommended_type,
            any(sample) AS sample,
            any(uniq_approx) AS uniq_approx
        FROM (
            SELECT
                (null_cnt > 0) AS nullable,
                multiIf(
                    uint_cnt = non_null_cnt,
                        multiIf(
                            length(toString(toUInt8OrNull(col))) = length(col), 'UInt8',
                            length(toString(toUInt16OrNull(col))) = length(col), 'UInt16',
                            length(toString(toUInt32OrNull(col))) = length(col), 'UInt32',
                            length(toString(toUInt64OrNull(col))) = length(col), 'UInt64',
                            'String'),
                    int_cnt = non_null_cnt,
                        multiIf(
                            length(toString(toInt8OrNull(col))) = length(col), 'Int8',
                            length(toString(toInt16OrNull(col))) = length(col), 'Int16',
                            length(toString(toInt32OrNull(col))) = length(col), 'Int32',
                            length(toString(toInt64OrNull(col))) = length(col), 'Int64',
                            'String'),
                    float_cnt = non_null_cnt,
                        multiIf(
                            length(toString(toFloat32OrNull(col))) = length(col), 'Float32',
                            length(toString(toFloat64OrNull(col))) = length(col), 'Float64',
                            'String'),
                    datetime_cnt = non_null_cnt,
                        if(parseDateTimeBestEffortOrNull(col) IS NOT NULL, 'DateTime', 'String'),
                    date_cnt = non_null_cnt,
                        if(parseDateTimeBestEffortOrNull(col) IS NOT NULL, 'Date', 'String'),
                    'String') AS recommended_type,
                sample,
                if({sample} == 0, any(uniq_values), any(toUInt64(uniq_values*(100/{sample})))) AS uniq_approx
            FROM (
                SELECT
                    '{column}' AS col_name,
                    toString("{column}") AS col
                FROM {database}.{table_name}
                WHERE if({sample} == 0, 1, rand(1)%100 <= ({sample} - 1))
            ) ALL INNER JOIN (
                SELECT
                    col_name,
                    count() AS col_cnt,
                    countIf(col IS NULL) AS null_cnt,
                    col_cnt - null_cnt AS non_null_cnt,
                    sum(match(col, '^([\-0-9\.]*)$')) AS float_cnt,
                    sum(match(col, '^([\-0-9]*)$')) AS int_cnt,
                    sum(match(col, '^([0-9]*)$')) AS uint_cnt,
                    sum(match(col, '.*([0-9]{{2}}:[0-9]{{2}}).*')) AS datetime_cnt,
                    sum(match(col, '.*([0-9]{{2}}[/\-][0-9]{{2}}[/\-][0-9]{{2}}).*')) AS date_cnt,
                    uniqExact(col) AS uniq_values,
                    topK(10)(col) AS sample
                FROM (
                    SELECT
                        '{column}' AS col_name,
                        nullIf(trim(toString("{column}")), '') AS col
                    FROM {database}.{table_name}
                    WHERE if({sample} == 0, 1, rand(1)%100 <= ({sample} - 1))
                ) GROUP BY col_name
            ) USING (col_name)
            GROUP BY nullable, recommended_type, sample
        ) GROUP BY recommended_type, col_name
        ORDER BY
            recommended_type = 'UInt64' DESC
            , recommended_type = 'UInt32' DESC
            , recommended_type = 'UInt16' DESC
            , recommended_type = 'UInt8' DESC
            , recommended_type = 'Int64' DESC
            , recommended_type = 'Int32' DESC
            , recommended_type = 'Int16' DESC
            , recommended_type = 'Int8' DESC
            , recommended_type = 'Float64' DESC
            , recommended_type = 'Float32' DESC
            , recommended_type = 'DateTime' DESC
            , recommended_type = 'Date' DESC
            , recommended_type = 'String' DESC
        LIMIT 1
    """
    return sqlformat(
        sql.format(database=database,
                   column=column,
                   table_name=table,
                   sample=sample))
예제 #6
0
 def getCreate(self, table):
     return sqlformat(self.q('SHOW CREATE {}'.format(
         self._set_db(table))).e().to_records()[0]['statement'],
                      oneline=False)