Пример #1
0
 def next(self, cache=False):
     vector = self._cache_vector(cache)
     q = f"""
         SELECT i, v FROM ({vector}) WHERE i > 0
         UNION ALL
         SELECT (SELECT COUNT() FROM {vector}), NULL"""
     return ColumnVector(Literal(q))
Пример #2
0
 def cut(self, cut_length, cache=False):
     vector = self._cache_vector(cache)
     q = f"""
         SELECT (i-i%{cut_length})/{cut_length} AS i, groupArray(v) AS v
         FROM ({vector})
         GROUP BY i ORDER BY i"""
     return ColumnVector(Literal(q))
Пример #3
0
 def rand(self, start, end, length, max_block_size=30000):
     q = f"""
         SELECT
             number AS i,
             {start} + rand64(number)%toUInt64(1 + abs({start} - {end})) AS v
         FROM numbers_mt({length})
         SETTINGS max_block_size={max_block_size}"""
     return ColumnVector(Literal(q))
Пример #4
0
 def maplead(self, func, cache=False):
     vector = self._cache_vector(cache)
     q = f"""
         SELECT i, {func}(v2.v, v1.v) AS v FROM ({vector}) v1
         ALL INNER JOIN (
             SELECT toUInt64(i-1) AS i, v FROM ({vector})
         ) v2 USING (i)"""
     return ColumnVector(Literal(q))
Пример #5
0
 def range(self, start, end, max_block_size=30000):
     q = f"""
         SELECT
             number AS i,
             number + {start} AS v 
         FROM numbers_mt(toUInt64(1 + abs({start} - {end})))
         SETTINGS max_block_size={max_block_size}"""
     return ColumnVector(Literal(q))
Пример #6
0
 def cast(self, to_type):
     vector = None
     toType = to_type if isinstance(to_type, str) else to_type.CAST
     if isinstance(self._value, list):
         vector = 'SELECT rowNumberInAllBlocks() AS i, arrayJoin({}) AS v'.format(str(self._value))
     else:
         vector = self.sql
     q = "SELECT i, cast(v, '{type}') AS v FROM ({vector})"
     return ColumnVector(Literal(q.format(type=toType, vector=vector)))
Пример #7
0
 def prev(self, cache=False):
     vector = self._cache_vector(cache)
     q = f"""
         SELECT 0 AS i, NULL AS v
         UNION ALL
         SELECT i+1, v FROM (
             SELECT * FROM ({vector}) WHERE i < (SELECT max(i-1) FROM ({vector}))
         )"""
     return ColumnVector(Literal(q))
Пример #8
0
 def toArrayVector(self):
     from vulkn.types.array_vector import ArrayVector
     cache_table = self._cache()
     q = f"""
         SELECT
             groupArray(v) AS v
         FROM (
             SELECT v FROM {cache_table} WHERE i < 100000000 ORDER BY i LIMIT 100000000
         )"""
     return ArrayVector(Literal(q))
Пример #9
0
 def flatten(self, cache=False):
     vector = self._cache_vector(cache)
     q = f"""
         SELECT rowNumberInAllBlocks() AS i, _v AS v
         FROM (
             SELECT _v FROM ({vector}) 
             ARRAY JOIN arrayEnumerate(v) AS _i, v AS _v
             ORDER BY i, _i
         )"""
     return ColumnVector(Literal(q))
Пример #10
0
 def rand(self, start, end, length):
     if length > 100000000:
         raise Exception(
             'ArrayVector cannot contain more than 100 million elements')
     q = f"""
         SELECT 
             groupArray({start} + rand64(number)%toUInt64(1 + abs({start} - {end}))) AS v
         FROM numbers({length})
         SETTINGS max_block_size = 100000000"""
     return ArrayVector(Literal(q))
Пример #11
0
 def shuffle(self):
     vector = self._value
     q = f"""
         SELECT
             groupArray(_shuffle) AS v
         FROM (
             SELECT _shuffle FROM (
                 SELECT _shuffle FROM ({vector}) ARRAY JOIN v AS _shuffle
             ) ORDER BY rand()
         ) SETTINGS max_block_size = 100000000"""
     return ArrayVector(Literal(q))
Пример #12
0
 def __init__(self, value: any=None, name: str=None, n: str=None) -> None:
     if isinstance(value, Literal):
         self._value = value
     else:
         v = []
         for col in value:
             if isinstance(col, str):
                 v.append("'{}'".format(col))
             else:
                 v.append(str(col))
         self._value = Literal('SELECT arrayJoin([{}]) AS v'.format(','.join(v)))
Пример #13
0
 def join(self, other, cache=False):
     vector = self._cache_vector(cache)
     other_vector = other._cache_vector(cache)
     q = f"""
         SELECT rowNumberInAllBlocks() AS i, v
         FROM (
             SELECT v FROM ({vector}) ORDER BY i
             UNION ALL
             SELECT v FROM ({other_vector}) ORDER BY i
         )"""
     return ColumnVector(Literal(q))
Пример #14
0
 def join(self, N):
     vector = self._value
     other_vector = N._value
     q = f"""
         SELECT groupArray(_v) AS v
         FROM (
             SELECT _v FROM ({vector}) ARRAY JOIN v AS _v
             UNION ALL
             SELECT _v FROM ({other_vector}) ARRAY JOIN v AS _v
         ) SETTINGS max_block_size=100000000"""
     #q = "SELECT arrayConcat(({}),({})) AS v".format(self._value, N._value)
     return ArrayVector(Literal(q))
Пример #15
0
 def toColumnVector(self):
     from vulkn.types.column_vector import ColumnVector
     cache_table = self._cache()
     q = f"""
         SELECT i, _v AS v
         FROM (
             SELECT i, _v
             FROM ({cache_table})
             ARRAY JOIN
                 arrayEnumerate(v) AS i,
                 v AS _v)"""
     return ColumnVector(Literal(q))
Пример #16
0
 def range(self, start, end):
     if end - start > 100000000:
         raise Exception(
             'ArrayVector cannot contain more than 100 million elements')
     q = f"""
         SELECT
             groupArray(number + {start}) AS v
         FROM numbers(toUInt64(1 + abs({start} - {end})))
         SETTINGS max_block_size = 100000000"""
     r = ArrayVector(Literal(q))
     r._sorted = True
     return r
Пример #17
0
 def delta(self):
     vector = self._value
     q = f"""
         SELECT arrayConcat([NULL], groupArray(_delta)) AS v
         FROM (
             SELECT
                 v1 - v2 AS _delta
             FROM ({vector})
             ARRAY JOIN
                 v AS v1,
                 arrayConcat([NULL], arraySlice(v, 1, -1)) AS v2
         ) SETTINGS max_block_size = 100000000"""
     return ArrayVector(Literal(q))
Пример #18
0
 def map(self, func, *args):
     a = ','.join(list(map(str, args))) + ',' if len(args) > 0 else ''
     vector = self.sql
     q = f"""
         SELECT groupArray(_v) AS v
         FROM (
             SELECT
                 {func}({a}_map) AS _v
             FROM ({vector})
             ARRAY JOIN
                 v AS _map
         ) SETTINGS max_block_size = 100000000"""
     return ArrayVector(Literal(q))
Пример #19
0
 def maplead(self, func):
     vector = self.sql
     q = f"""
         SELECT groupArray(_maplead) AS v
         FROM (
             SELECT
                 {func}(v2, v1) AS _maplead
             FROM ({vector})
             ARRAY JOIN
                 v AS v1,
                 arrayConcat(arraySlice(v, 2), [NULL]) AS v2
         ) SETTINGS max_block_size = 100000000"""
     return ArrayVector(Literal(q))
Пример #20
0
 def take(self, length):
     vector = None
     if isinstance(self._value, str) or isinstance(self._value, Literal):
         vector = 'SELECT groupArray(v) FROM (SELECT v FROM ({}) ORDER BY i)'.format(self.sql)
     else:
         vector = str(self)
     q = f"""
         SELECT rowNumberInAllBlocks() AS i, v
         FROM (
             WITH ({vector}) AS `#v`
             SELECT `#v`[(number%length(`#v`))+1] AS v
             FROM numbers_mt({length})
         )"""
     return ColumnVector(Literal(q))
Пример #21
0
 def sort(self):
     if self._sorted:
         return self
     vector = self.sql
     q = f"""
         SELECT
             groupArray(_sort) AS v
         FROM (
             SELECT _sort FROM (
                 SELECT _sort FROM ({vector}) ARRAY JOIN v AS _sort
             ) ORDER BY _sort
         ) SETTINGS max_block_size = 100000000"""
     r = ArrayVector(Literal(q))
     r._sorted = True
     return r
Пример #22
0
 def __init__(self,
              value: any = None,
              name: str = None,
              n: str = None) -> None:
     self._sorted = False
     self._cache_table = None
     if isinstance(value, Literal):
         self._value = value
     else:
         v = []
         for col in value:
             if isinstance(col, str):
                 v.append("'{}'".format(col))
             else:
                 v.append(str(col))
         self._value = Literal("SELECT [{}] AS v".format(','.join(v)))
Пример #23
0
 def take(self, length):
     if length > 100000000:
         raise Exception(
             'ArrayVector cannot contain more than 100 million elements')
     vector = self._value
     q = f"""
         SELECT
             _take AS v
         FROM (
             SELECT
                 groupArrayArrayArrayArray([
                     arrayMap(x -> v, range(toUInt64(floor({length} / length(v))))),
                     [arraySlice(v, 1, {length} % length(v))]
                 ]) AS _take
             FROM ({vector}))"""
     return ArrayVector(Literal(q))
Пример #24
0
 def norm(self, mean, stddev, length):
     if length > 100000000:
         raise Exception(
             'ArrayVector cannot contain more than 100 million elements')
     count = int(length)
     UInt32_MAX = vulkn.types.UInt32.MAX
     q = f"""
         SELECT
             arraySlice(
                 arrayReduce(
                     'groupArrayArray',
                     arrayMap(
                         i -> 
                             [ ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*cos(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean})
                             , ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*sin(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean})]
                         , range(toUInt64(ceil({count}/2))))), 1, {count}) AS v"""
     return ArrayVector(Literal(q))
Пример #25
0
 def cut(self, cut_length):
     vector = self._value
     q = f"""
         SELECT
             groupArray(_v) AS v
         FROM (
             WITH
                 (_i-_i%{cut_length})/{cut_length}+1 AS _idx
             SELECT groupArray(_cut) AS _v
             FROM (
                 SELECT i - 1 AS _i, _cut FROM ({vector}) ARRAY JOIN v AS _cut, arrayEnumerate(v) AS i
             )
             GROUP BY _idx ORDER BY _idx
         ) SETTINGS max_block_size = 100000000"""
     r = ArrayVector(Literal(q))
     r._sorted = self._sorted
     return r
Пример #26
0
 def norm(self, mean, stddev, count):
     count = int(count)
     UInt32_MAX = vulkn.types.UInt32.MAX
     q = f"""
         SELECT
             rowNumberInAllBlocks() AS i,
             v
         FROM (
         SELECT
             arrayJoin(
                 arraySlice(
                     arrayReduce(
                         'groupArrayArray',
                         arrayMap(
                             i -> 
                                 [ ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*cos(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean})
                                 , ((sqrt(-2.0*log(rand(i)/{UInt32_MAX}))*sin(2*pi()*rand(i+100000000)/{UInt32_MAX}))*toFloat32({stddev}))+toFloat32({mean})]
                             , range(toUInt64(ceil({count}/2))))), 1, {count})) AS v
         SETTINGS max_block_size=1)"""
     return ColumnVector(Literal(q))
Пример #27
0
 def move(self, positions):
     vector = self._value
     q = None
     if positions > 0:
         q = f"""
             SELECT 
                 arrayConcat(
                     arrayWithConstant({positions}, NULL),
                     arraySlice(v, 1, -({positions}))
                 ) AS v
             FROM ({vector})"""
     elif positions < 0:
         q = f"""
             SELECT
                 arrayConcat(
                     arraySlice(v, abs({positions})+1),
                     arrayWithConstant(abs({positions}), NULL)
                 ) AS v
             FROM ({vector})"""
     else:
         return self
     r = ArrayVector(Literal(q))
     r._sorted = self._sorted
     return r
Пример #28
0
 def _method(lambda_arg):
     func = 'array{}'.format(''.join(map(str.title, name.split('_'))))
     v = FunctionExpression(func, Literal(lambda_arg),
                            quote_literal(self._value))
     return Array(v)
Пример #29
0
 def JSONExtractKeysAndValues(self, *indices_or_keys, value_type):
     return TypeBase(
         Literal((func('JSONExtractKeysAndValues', self._value,
                       *indices_or_keys, value_type))))
Пример #30
0
 def JSONExtract(self, indices_or_keys, return_type):
     return TypeBase(
         Literal((func('JSONExtract', self._value, *indices_or_keys,
                       return_type))))