def next(self, cache=False): vector = self._cache_vector(cache) q = f""" SELECT i, v FROM ({vector}) WHERE i > 0 UNION ALL SELECT (SELECT COUNT() FROM {vector}), NULL""" return ColumnVector(Literal(q))
def cut(self, cut_length, cache=False): vector = self._cache_vector(cache) q = f""" SELECT (i-i%{cut_length})/{cut_length} AS i, groupArray(v) AS v FROM ({vector}) GROUP BY i ORDER BY i""" return ColumnVector(Literal(q))
def rand(self, start, end, length, max_block_size=30000): q = f""" SELECT number AS i, {start} + rand64(number)%toUInt64(1 + abs({start} - {end})) AS v FROM numbers_mt({length}) SETTINGS max_block_size={max_block_size}""" return ColumnVector(Literal(q))
def maplead(self, func, cache=False): vector = self._cache_vector(cache) q = f""" SELECT i, {func}(v2.v, v1.v) AS v FROM ({vector}) v1 ALL INNER JOIN ( SELECT toUInt64(i-1) AS i, v FROM ({vector}) ) v2 USING (i)""" return ColumnVector(Literal(q))
def range(self, start, end, max_block_size=30000): q = f""" SELECT number AS i, number + {start} AS v FROM numbers_mt(toUInt64(1 + abs({start} - {end}))) SETTINGS max_block_size={max_block_size}""" return ColumnVector(Literal(q))
def cast(self, to_type): vector = None toType = to_type if isinstance(to_type, str) else to_type.CAST if isinstance(self._value, list): vector = 'SELECT rowNumberInAllBlocks() AS i, arrayJoin({}) AS v'.format(str(self._value)) else: vector = self.sql q = "SELECT i, cast(v, '{type}') AS v FROM ({vector})" return ColumnVector(Literal(q.format(type=toType, vector=vector)))
def prev(self, cache=False): vector = self._cache_vector(cache) q = f""" SELECT 0 AS i, NULL AS v UNION ALL SELECT i+1, v FROM ( SELECT * FROM ({vector}) WHERE i < (SELECT max(i-1) FROM ({vector})) )""" return ColumnVector(Literal(q))
def toArrayVector(self): from vulkn.types.array_vector import ArrayVector cache_table = self._cache() q = f""" SELECT groupArray(v) AS v FROM ( SELECT v FROM {cache_table} WHERE i < 100000000 ORDER BY i LIMIT 100000000 )""" return ArrayVector(Literal(q))
def flatten(self, cache=False): vector = self._cache_vector(cache) q = f""" SELECT rowNumberInAllBlocks() AS i, _v AS v FROM ( SELECT _v FROM ({vector}) ARRAY JOIN arrayEnumerate(v) AS _i, v AS _v ORDER BY i, _i )""" return ColumnVector(Literal(q))
def rand(self, start, end, length): if length > 100000000: raise Exception( 'ArrayVector cannot contain more than 100 million elements') q = f""" SELECT groupArray({start} + rand64(number)%toUInt64(1 + abs({start} - {end}))) AS v FROM numbers({length}) SETTINGS max_block_size = 100000000""" return ArrayVector(Literal(q))
def shuffle(self): vector = self._value q = f""" SELECT groupArray(_shuffle) AS v FROM ( SELECT _shuffle FROM ( SELECT _shuffle FROM ({vector}) ARRAY JOIN v AS _shuffle ) ORDER BY rand() ) SETTINGS max_block_size = 100000000""" return ArrayVector(Literal(q))
def __init__(self, value: any=None, name: str=None, n: str=None) -> None: if isinstance(value, Literal): self._value = value else: v = [] for col in value: if isinstance(col, str): v.append("'{}'".format(col)) else: v.append(str(col)) self._value = Literal('SELECT arrayJoin([{}]) AS v'.format(','.join(v)))
def join(self, other, cache=False): vector = self._cache_vector(cache) other_vector = other._cache_vector(cache) q = f""" SELECT rowNumberInAllBlocks() AS i, v FROM ( SELECT v FROM ({vector}) ORDER BY i UNION ALL SELECT v FROM ({other_vector}) ORDER BY i )""" return ColumnVector(Literal(q))
def join(self, N): vector = self._value other_vector = N._value q = f""" SELECT groupArray(_v) AS v FROM ( SELECT _v FROM ({vector}) ARRAY JOIN v AS _v UNION ALL SELECT _v FROM ({other_vector}) ARRAY JOIN v AS _v ) SETTINGS max_block_size=100000000""" #q = "SELECT arrayConcat(({}),({})) AS v".format(self._value, N._value) return ArrayVector(Literal(q))
def toColumnVector(self): from vulkn.types.column_vector import ColumnVector cache_table = self._cache() q = f""" SELECT i, _v AS v FROM ( SELECT i, _v FROM ({cache_table}) ARRAY JOIN arrayEnumerate(v) AS i, v AS _v)""" return ColumnVector(Literal(q))
def range(self, start, end): if end - start > 100000000: raise Exception( 'ArrayVector cannot contain more than 100 million elements') q = f""" SELECT groupArray(number + {start}) AS v FROM numbers(toUInt64(1 + abs({start} - {end}))) SETTINGS max_block_size = 100000000""" r = ArrayVector(Literal(q)) r._sorted = True return r
def delta(self): vector = self._value q = f""" SELECT arrayConcat([NULL], groupArray(_delta)) AS v FROM ( SELECT v1 - v2 AS _delta FROM ({vector}) ARRAY JOIN v AS v1, arrayConcat([NULL], arraySlice(v, 1, -1)) AS v2 ) SETTINGS max_block_size = 100000000""" return ArrayVector(Literal(q))
def map(self, func, *args): a = ','.join(list(map(str, args))) + ',' if len(args) > 0 else '' vector = self.sql q = f""" SELECT groupArray(_v) AS v FROM ( SELECT {func}({a}_map) AS _v FROM ({vector}) ARRAY JOIN v AS _map ) SETTINGS max_block_size = 100000000""" return ArrayVector(Literal(q))
def maplead(self, func): vector = self.sql q = f""" SELECT groupArray(_maplead) AS v FROM ( SELECT {func}(v2, v1) AS _maplead FROM ({vector}) ARRAY JOIN v AS v1, arrayConcat(arraySlice(v, 2), [NULL]) AS v2 ) SETTINGS max_block_size = 100000000""" return ArrayVector(Literal(q))
def take(self, length): vector = None if isinstance(self._value, str) or isinstance(self._value, Literal): vector = 'SELECT groupArray(v) FROM (SELECT v FROM ({}) ORDER BY i)'.format(self.sql) else: vector = str(self) q = f""" SELECT rowNumberInAllBlocks() AS i, v FROM ( WITH ({vector}) AS `#v` SELECT `#v`[(number%length(`#v`))+1] AS v FROM numbers_mt({length}) )""" return ColumnVector(Literal(q))
def sort(self): if self._sorted: return self vector = self.sql q = f""" SELECT groupArray(_sort) AS v FROM ( SELECT _sort FROM ( SELECT _sort FROM ({vector}) ARRAY JOIN v AS _sort ) ORDER BY _sort ) SETTINGS max_block_size = 100000000""" r = ArrayVector(Literal(q)) r._sorted = True return r
def __init__(self, value: any = None, name: str = None, n: str = None) -> None: self._sorted = False self._cache_table = None if isinstance(value, Literal): self._value = value else: v = [] for col in value: if isinstance(col, str): v.append("'{}'".format(col)) else: v.append(str(col)) self._value = Literal("SELECT [{}] AS v".format(','.join(v)))
def take(self, length): if length > 100000000: raise Exception( 'ArrayVector cannot contain more than 100 million elements') vector = self._value q = f""" SELECT _take AS v FROM ( SELECT groupArrayArrayArrayArray([ arrayMap(x -> v, range(toUInt64(floor({length} / length(v))))), [arraySlice(v, 1, {length} % length(v))] ]) AS _take FROM ({vector}))""" return ArrayVector(Literal(q))
def norm(self, mean, stddev, length): if length > 100000000: raise Exception( 'ArrayVector cannot contain more than 100 million elements') count = int(length) UInt32_MAX = vulkn.types.UInt32.MAX q = f""" SELECT arraySlice( arrayReduce( 'groupArrayArray', arrayMap( i -> [ ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*cos(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean}) , ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*sin(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean})] , range(toUInt64(ceil({count}/2))))), 1, {count}) AS v""" return ArrayVector(Literal(q))
def cut(self, cut_length): vector = self._value q = f""" SELECT groupArray(_v) AS v FROM ( WITH (_i-_i%{cut_length})/{cut_length}+1 AS _idx SELECT groupArray(_cut) AS _v FROM ( SELECT i - 1 AS _i, _cut FROM ({vector}) ARRAY JOIN v AS _cut, arrayEnumerate(v) AS i ) GROUP BY _idx ORDER BY _idx ) SETTINGS max_block_size = 100000000""" r = ArrayVector(Literal(q)) r._sorted = self._sorted return r
def norm(self, mean, stddev, count): count = int(count) UInt32_MAX = vulkn.types.UInt32.MAX q = f""" SELECT rowNumberInAllBlocks() AS i, v FROM ( SELECT arrayJoin( arraySlice( arrayReduce( 'groupArrayArray', arrayMap( i -> [ ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*cos(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean}) , ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*sin(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean})] , range(toUInt64(ceil({count}/2))))), 1, {count})) AS v SETTINGS max_block_size=1)""" return ColumnVector(Literal(q))
def move(self, positions): vector = self._value q = None if positions > 0: q = f""" SELECT arrayConcat( arrayWithConstant({positions}, NULL), arraySlice(v, 1, -({positions})) ) AS v FROM ({vector})""" elif positions < 0: q = f""" SELECT arrayConcat( arraySlice(v, abs({positions})+1), arrayWithConstant(abs({positions}), NULL) ) AS v FROM ({vector})""" else: return self r = ArrayVector(Literal(q)) r._sorted = self._sorted return r
def _method(lambda_arg): func = 'array{}'.format(''.join(map(str.title, name.split('_')))) v = FunctionExpression(func, Literal(lambda_arg), quote_literal(self._value)) return Array(v)
def JSONExtractKeysAndValues(self, *indices_or_keys, value_type): return TypeBase( Literal((func('JSONExtractKeysAndValues', self._value, *indices_or_keys, value_type))))
def JSONExtract(self, indices_or_keys, return_type): return TypeBase( Literal((func('JSONExtract', self._value, *indices_or_keys, return_type))))