示例#1
0
    def condense(self):
        """
        :return:
        """
        # MAKE NEW SHARD
        partition = JoinSQL(
            SQL_COMMA,
            [
                quote_column(c.es_field) for f in listwrap(self.id.field)
                for c in self.flake.leaves(f)
            ],
        )
        order_by = JoinSQL(
            SQL_COMMA,
            [
                ConcatSQL(quote_column(c.es_field), SQL_DESC)
                for f in listwrap(self.id.version)
                for c in self.flake.leaves(f)
            ],
        )
        # WRAP WITH etl.timestamp BEST SELECTION

        self.container.query_and_wait(
            ConcatSQL(
                SQL(  # SOME KEYWORDS: ROWNUM RANK
                    "SELECT * EXCEPT (_rank) FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY "
                ),
                partition,
                SQL_ORDERBY,
                order_by,
                SQL(") AS _rank FROM "),
                quote_column(self.full_name),
                SQL(") a WHERE _rank=1"),
            ))
示例#2
0
def sql_query(command):
    """
    VERY BASIC QUERY EXPRESSION TO SQL
    :param command: jx-expression
    :return: SQL
    """
    command = wrap(command)
    acc = [SQL_SELECT]
    if command.select:
        acc.append(
            JoinSQL(SQL_COMMA, map(quote_column, listwrap(command.select))))
    else:
        acc.append(SQL_STAR)

    acc.append(SQL_FROM)
    acc.append(quote_column(command["from"]))
    if command.where:
        acc.append(SQL_WHERE)
        if command.where.eq:
            acc.append(sql_eq(**command.where.eq))
        else:
            where = esfilter2sqlwhere(command.where)
            acc.append(where)

    sort = coalesce(command.orderby, command.sort)
    if sort:
        acc.append(SQL_ORDERBY)
        acc.append(JoinSQL(SQL_COMMA, map(quote_column, listwrap(sort))))

    if command.limit:
        acc.append(SQL_LIMIT)
        acc.append(
            JoinSQL(SQL_COMMA, map(quote_value, listwrap(command.limit))))

    return ConcatSQL(*acc)
示例#3
0
    def to_sql(self, schema, not_null=False, boolean=False):
        lhs = SQLang[self.lhs].partial_eval()
        rhs = SQLang[self.rhs].partial_eval()
        lhs_sql = lhs.to_sql(schema, not_null=True)
        rhs_sql = rhs.to_sql(schema, not_null=True)
        if is_literal(rhs) and lhs_sql[0].sql.b != None and rhs.value in ("T",
                                                                          "F"):
            rhs_sql = BooleanOp(rhs).to_sql(schema)
        if is_literal(lhs) and rhs_sql[0].sql.b != None and lhs.value in ("T",
                                                                          "F"):
            lhs_sql = BooleanOp(lhs).to_sql(schema)

        if len(lhs_sql) != len(rhs_sql):
            Log.error("lhs and rhs have different dimensionality!?")

        acc = []
        for l, r in zip(lhs_sql, rhs_sql):
            for t in "bsnj":
                if r.sql[t] == None:
                    if l.sql[t] == None:
                        pass
                    else:
                        acc.append(ConcatSQL(l.sql[t], SQL_IS_NULL))
                elif l.sql[t] == None:
                    acc.append(ConcatSQL(r.sql[t], SQL_IS_NULL))
                else:
                    acc.append(
                        ConcatSQL(sql_iso(l.sql[t]), SQL_EQ,
                                  sql_iso(r.sql[t])))
        if not acc:
            return FALSE.to_sql(schema)
        else:
            return wrap([{"name": ".", "sql": {"b": JoinSQL(SQL_OR, acc)}}])
示例#4
0
 def to_sql(self, schema, not_null=False, boolean=False):
     return wrap([{
         "name": ".",
         "sql": {
             "b":
             JoinSQL(
                 SQL_OR,
                 [
                     sql_iso(SQLang[t].to_sql(
                         schema, boolean=True)[0].sql.b) for t in self.terms
                 ],
             )
         },
     }])
示例#5
0
def quote_column(*path):
    if DEBUG:
        if not path:
            Log.error("expecting a name")
        for p in path:
            if not is_text(p):
                Log.error("expecting strings, not {{type}}",
                          type=p.__class__.__name__)
    try:
        output = ConcatSQL(SQL_SPACE,
                           JoinSQL(SQL_DOT, [SQL(quote(p)) for p in path]),
                           SQL_SPACE)
        return output
    except Exception as e:
        Log.error("Not expacted", cause=e)
示例#6
0
def sql_query(command):
    """
    VERY BASIC QUERY EXPRESSION TO SQL
    :param command: jx-expression
    :return: SQL
    """
    command = wrap(command)
    acc = [SQL_SELECT]
    if command.select:
        acc.append(
            JoinSQL(SQL_COMMA, map(quote_column, listwrap(command.select))))
    else:
        acc.append(SQL_STAR)

    acc.append(SQL_FROM)
    acc.append(quote_column(command["from"]))
    if command.where.eq:
        acc.append(SQL_WHERE)
        acc.append(sql_eq(**command.where.eq))
    if command.orderby:
        acc.append(SQL_ORDERBY)
        acc.append(
            JoinSQL(SQL_COMMA, map(quote_column, listwrap(command.orderby))))
    return ConcatSQL(*acc)
示例#7
0
    def where(self, filter):
        """
        WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES
        :param filter:  jx_expression filter
        :return: list of objects that match
        """
        select = []
        column_names = []
        for c in self.schema.columns:
            if c.jx_type in STRUCT:
                continue
            if len(c.nested_path) != 1:
                continue
            column_names.append(c.name)
            select.append(sql_alias(quote_column(c.es_column), c.name))

        where_sql = SQLang[jx_expression(filter)].to_sql(self.schema)[0].sql.b
        result = self.db.query(ConcatSQL(
            SQL_SELECT, JoinSQL(SQL_COMMA, select),
            SQL_FROM, quote_column(self.snowflake.fact_name),
            SQL_WHERE, where_sql
        ))

        return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
示例#8
0
def sql_call(func_name, *parameters):
    return ConcatSQL(SQL(func_name), sql_iso(JoinSQL(SQL_COMMA, parameters)))
示例#9
0
def quote_column(*path):
    if not path:
        Log.error("missing column_name")
    if len(path) == 1:
        return SQL("`" + path[0].replace('`', '``') + "`")
    return JoinSQL(SQL_DOT, map(quote_column, path))
示例#10
0
    def clean(self):
        """
        REMOVE ANY RECORDS THAT ARE NOT NEEDED BY QUEUE OR SUBSCRIBERS
        """
        now = Date.now()

        # ANY BLOCKS TO FLUSH?
        with self.db.transaction() as t:
            result = t.query(
                SQL(
                    f"""
                    SELECT id, block_size_mb, block_start
                    FROM {QUEUE} AS q
                    JOIN {BLOCKS} AS b ON b.queue=q.id AND b.serial=q.block_start
                    WHERE b.last_used < {quote_value(now-Duration(WRITE_INTERVAL))}            
                    """
                )
            )

        for stale in rows(result):
            queue = first(q for q in self.queues if q.id == stale.id)
            queue._flush(**stale)

        # REMOVE UNREACHABLE MESSAGES
        conditions = []
        for q in self.queues:
            conditions.append(
                SQL(f"(queue = {quote_value(q.id)} AND serial IN (")
                + SQL(
                    f"""
                    SELECT m.serial
                    FROM {MESSAGES} AS m 
                    LEFT JOIN {UNCONFIRMED} as u ON u.serial = m.serial
                    LEFT JOIN {SUBSCRIBER} as s  ON s.queue = m.queue and s.id = u.subscriber 
                    LEFT JOIN {QUEUE} as q ON q.id = m.queue and m.serial >= q.block_start
                    LEFT JOIN {SUBSCRIBER} as la ON 
                        la.queue = m.queue AND 
                        la.last_confirmed_serial < m.serial AND 
                        m.serial < la.next_emit_serial+la.look_ahead_serial
                    WHERE 
                        m.queue = {q.id} AND
                        s.id IS NULL AND -- STILL UNCONFIRMED POP
                        q.id IS NULL AND -- NOT WRITTEN TO S3 YET
                        la.id IS NULL    -- NOT IN LOOK-AHEAD FOR SUBSCRIBER           
                    """
                )
                + SQL("))")
            )

        with self.db.transaction() as t:
            if DEBUG:
                result = t.query(
                    ConcatSQL(
                        SQL(f"SELECT count(1) AS `count` FROM {MESSAGES} WHERE "),
                        JoinSQL(SQL_OR, conditions),
                    )
                )
                Log.note(
                    "Delete {{num}} messages from database", num=first_row(result).count
                )

            t.execute(
                ConcatSQL(
                    SQL(f"DELETE FROM {MESSAGES} WHERE "), JoinSQL(SQL_OR, conditions)
                )
            )
示例#11
0
    def _gen_select(source_path, source_tops, source_flake, total_path,
                    total_tops, total_flake):
        if total_flake == source_flake and not total_tops:
            return [
                quote_column(source_path + escape_name(k))
                for k in jx.sort(total_flake.keys())
            ]

        if NESTED_TYPE in total_flake:
            # PROMOTE EVERYTHING TO REPEATED
            v = source_flake.get(NESTED_TYPE)
            t = total_flake.get(NESTED_TYPE)

            if not v:
                # CONVERT INNER OBJECT TO ARRAY OF ONE STRUCT
                inner = [
                    ConcatSQL(
                        SQL_SELECT_AS_STRUCT,
                        JoinSQL(
                            ConcatSQL(SQL_COMMA, SQL_CR),
                            _gen_select(
                                source_path,
                                Null,
                                source_flake,
                                total_path + REPEATED,
                                Null,
                                t,
                            ),
                        ),
                    )
                ]
            else:
                row_name = "row" + text(len(source_path.values))
                ord_name = "ordering" + text(len(source_path.values))
                inner = [
                    ConcatSQL(
                        SQL_SELECT_AS_STRUCT,
                        JoinSQL(
                            ConcatSQL(SQL_COMMA, SQL_CR),
                            _gen_select(ApiName(row_name), Null, v,
                                        ApiName(row_name), Null, t),
                        ),
                        SQL_FROM,
                        sql_call("UNNEST",
                                 quote_column(source_path + REPEATED)),
                        SQL_AS,
                        SQL(row_name),
                        SQL(" WITH OFFSET AS "),
                        SQL(ord_name),
                        SQL_ORDERBY,
                        SQL(ord_name),
                    )
                ]

            return [sql_alias(sql_call("ARRAY", *inner), REPEATED)]

        selection = []
        for k, t in jx.sort(total_flake.items(), 0):
            k_total_tops = total_tops if is_text(total_tops) else total_tops[k]
            k_tops = source_tops if is_text(source_tops) else source_tops[k]
            v = source_flake.get(k)
            if is_text(k_total_tops):
                # DO NOT INCLUDE TOP_LEVEL_FIELDS
                pass
            elif t == v and not k_total_tops and not k_tops:
                selection.append(
                    ConcatSQL(
                        quote_column(source_path + escape_name(k)),
                        SQL_AS,
                        quote_column(escape_name(k)),
                    ))
            elif is_data(t):
                if not v:
                    selects = _gen_select(
                        source_path + escape_name(k),
                        source_tops,
                        {},
                        total_path + escape_name(k),
                        k_total_tops,
                        t,
                    )
                elif is_data(v):
                    selects = _gen_select(
                        source_path + escape_name(k),
                        source_tops,
                        v,
                        total_path + escape_name(k),
                        k_total_tops,
                        t,
                    )
                else:
                    raise Log.error(
                        "Datatype mismatch on {{field}}: Can not merge {{type}} into {{main}}",
                        field=join_field(source_path + escape_name(k)),
                        type=v,
                        main=t,
                    )
                if selects:
                    inner = [
                        ConcatSQL(
                            SQL_SELECT_AS_STRUCT,
                            JoinSQL(ConcatSQL(SQL_COMMA, SQL_CR), selects),
                        )
                    ]
                    selection.append(
                        sql_alias(sql_call("", *inner), escape_name(k)))
            elif is_text(t):
                if is_text(k_tops):
                    # THE SOURCE HAS THIS PROPERTY AS A TOP_LEVEL_FIELD
                    selection.append(
                        ConcatSQL(SQL(k_tops), SQL_AS,
                                  quote_column(escape_name(k))))
                elif v == t:
                    selection.append(
                        ConcatSQL(
                            quote_column(total_path + escape_name(k)),
                            SQL_AS,
                            quote_column(escape_name(k)),
                        ))
                else:
                    if v:
                        Log.note(
                            "Datatype mismatch on {{field}}: Can not merge {{type}} into {{main}}",
                            field=join_field(source_path + escape_name(k)),
                            type=v,
                            main=t,
                        )
                    selection.append(
                        ConcatSQL(
                            sql_call(
                                "CAST",
                                ConcatSQL(SQL_NULL, SQL_AS,
                                          SQL(json_type_to_bq_type[t])),
                            ),
                            SQL_AS,
                            quote_column(escape_name(k)),
                        ))
            else:
                Log.error("not expected")
        return selection
示例#12
0
    def merge_shards(self):
        shards = []
        tables = list(self.container.client.list_tables(
            self.container.dataset))
        current_view = Null  # VIEW THAT POINTS TO PRIMARY SHARD
        primary_shard_name = None  # PRIMARY SHARD
        api_name = escape_name(self.short_name)

        for table_item in tables:
            table = table_item.reference
            table_api_name = ApiName(table.table_id)
            if text(table_api_name).startswith(text(api_name)):
                if table_api_name == api_name:
                    if table_item.table_type != "VIEW":
                        Log.error("expecting {{table}} to be a view",
                                  table=api_name)
                    current_view = self.container.client.get_table(table)
                    view_sql = current_view.view_query
                    primary_shard_name = _extract_primary_shard_name(view_sql)
                elif SUFFIX_PATTERN.match(
                        text(table_api_name)[len(text(api_name)):]):
                    try:
                        known_table = self.container.client.get_table(table)
                        shards.append(known_table)
                    except Exception as e:
                        Log.warning("could not merge table {{table}}",
                                    table=table,
                                    cause=e)

        if not current_view:
            Log.error("expecting {{table}} to be a view pointing to a table",
                      table=api_name)

        shard_flakes = [
            Snowflake.parse(
                big_query_schema=shard.schema,
                es_index=text(self.container.full_name +
                              ApiName(shard.table_id)),
                top_level_fields=self.top_level_fields,
                partition=self.partition,
            ) for shard in shards
        ]
        total_flake = snowflakes.merge(
            shard_flakes,
            es_index=text(self.full_name),
            top_level_fields=self.top_level_fields,
            partition=self.partition,
        )

        for i, s in enumerate(shards):
            if ApiName(s.table_id) == primary_shard_name:
                if total_flake == shard_flakes[i]:
                    # USE THE CURRENT PRIMARY SHARD AS A DESTINATION
                    del shards[i]
                    del shard_flakes[i]
                    break
        else:
            name = self.short_name + "_" + "".join(Random.sample(ALLOWED, 20))
            primary_shard_name = escape_name(name)
            self.container.create_table(
                table=name,
                schema=total_flake.schema,
                sharded=False,
                read_only=False,
                kwargs=self.config,
            )

        primary_full_name = self.container.full_name + primary_shard_name

        selects = []
        for flake, table in zip(shard_flakes, shards):
            q = ConcatSQL(
                SQL_SELECT,
                JoinSQL(ConcatSQL(SQL_COMMA, SQL_CR),
                        gen_select(total_flake, flake)),
                SQL_FROM,
                quote_column(ApiName(table.dataset_id, table.table_id)),
            )
            selects.append(q)

        Log.note("inserting into table {{table}}",
                 table=text(primary_shard_name))
        matched = []
        unmatched = []
        for sel, shard, flake in zip(selects, shards, shard_flakes):
            if flake == total_flake:
                matched.append((sel, shard, flake))
            else:
                unmatched.append((sel, shard, flake))

        # EVERYTHING THAT IS IDENTICAL TO PRIMARY CAN BE MERGED WITH SIMPLE UNION ALL
        if matched:
            for g, merge_chunk in jx.chunk(matched, MAX_MERGE):
                command = ConcatSQL(
                    SQL_INSERT,
                    quote_column(primary_full_name),
                    JoinSQL(
                        SQL_UNION_ALL,
                        (sql_query({
                            "from":
                            self.container.full_name + ApiName(shard.table_id)
                        }) for _, shard, _ in merge_chunk),
                    ),
                )
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                Log.note("job {{id}} state = {{state}}",
                         id=job.job_id,
                         state=job.state)

                if job.errors:
                    Log.error(
                        "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                        sql=command.sql,
                        reason=job.errors,
                    )
                for _, shard, _ in merge_chunk:
                    self.container.client.delete_table(shard)

        # ALL OTHER SCHEMAS MISMATCH
        for s, shard, _ in unmatched:
            try:
                command = ConcatSQL(SQL_INSERT,
                                    quote_column(primary_full_name), s)
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                Log.note(
                    "from {{shard}}, job {{id}}, state {{state}}",
                    id=job.job_id,
                    shard=shard.table_id,
                    state=job.state,
                )

                if job.errors:
                    if all(" does not have a schema." in m
                           for m in wrap(job.errors).message):
                        pass  # NOTHING TO DO
                    else:
                        Log.error(
                            "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                            sql=command.sql,
                            reason=job.errors,
                        )

                self.container.client.delete_table(shard)
            except Exception as e:
                Log.warning("failure to merge {{shard}}", shard=shard, cause=e)

        # REMOVE OLD VIEW
        view_full_name = self.container.full_name + api_name
        if current_view:
            self.container.client.delete_table(current_view)

        # CREATE NEW VIEW
        self.container.create_view(view_full_name, primary_full_name)