Пример #1
0
    def __string_col(self, col: str) -> str:
        col_type = self.__columns.get(col, None)
        col_type = str(col_type) if col_type else None

        if col_type and 'String' in col_type and 'FixedString' not in col_type:
            return escape_col(col)
        else:
            return 'toString({})'.format(escape_col(col))
Пример #2
0
    def __init__(self, base_name, name, type):
        self.base_name = base_name
        self.name = name
        self.type = type

        self.flattened = '{}.{}'.format(
            self.base_name, self.name) if self.base_name else self.name
        self.escaped = escape_col(self.flattened)
Пример #3
0
 def column_expr(self,
                 column_name,
                 query: Query,
                 parsing_context: ParsingContext,
                 table_alias: str = ""):
     """
     Return an expression for the column name. Handle special column aliases
     that evaluate to something else.
     """
     return escape_col(qualified_column(column_name, table_alias))
Пример #4
0
    def test_escape_col(self):
        assert escape_col(None) is None
        assert escape_col('') == ''
        assert escape_col('foo') == 'foo'
        assert escape_col('foo.bar') == 'foo.bar'
        assert escape_col('foo:bar') == '`foo:bar`'

        # Even though backtick characters in columns should be
        # disallowed by the query schema, make sure we dont allow
        # injection anyway.
        assert escape_col("`") == r"`\``"
        assert escape_col("production`; --") == r"`production\`; --`"
Пример #5
0
 def column_expr(self, column_name, body):
     """
     Return an expression for the column name. Handle special column aliases
     that evaluate to something else.
     """
     return escape_col(column_name)
Пример #6
0
 def for_schema(self):
     return '{} {}'.format(escape_col(self.name), self.type.for_schema())
Пример #7
0
def process_delete_tag(message, dataset) -> Optional[Replacement]:
    tag = message['tag']
    if not tag:
        return None

    assert isinstance(tag, str)
    timestamp = datetime.strptime(message['datetime'],
                                  settings.PAYLOAD_DATETIME_FORMAT)
    tag_column_name = dataset.get_tag_column_map()['tags'].get(tag, tag)
    is_promoted = tag in dataset.get_promoted_tags()['tags']

    where = """\
        WHERE project_id = %(project_id)s
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    if is_promoted:
        where += "AND %(tag_column)s IS NOT NULL"
    else:
        where += "AND has(`tags.key`, %(tag_str)s)"

    insert_query_template = """\
        INSERT INTO %(dist_write_table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(dist_read_table_name)s FINAL
    """ + where

    select_columns = []
    all_columns = dataset.get_dataset_schemas().get_read_schema().get_columns()
    for col in all_columns:
        if is_promoted and col.flattened == tag_column_name:
            select_columns.append('NULL')
        elif col.flattened == 'tags.key':
            select_columns.append(
                "arrayFilter(x -> (indexOf(`tags.key`, x) != indexOf(`tags.key`, %s)), `tags.key`)"
                % escape_string(tag))
        elif col.flattened == 'tags.value':
            select_columns.append(
                "arrayMap(x -> arrayElement(`tags.value`, x), arrayFilter(x -> x != indexOf(`tags.key`, %s), arrayEnumerate(`tags.value`)))"
                % escape_string(tag))
        else:
            select_columns.append(col.escaped)

    all_column_names = [col.escaped for col in all_columns]
    query_args = {
        'all_columns': ', '.join(all_column_names),
        'select_columns': ', '.join(select_columns),
        'project_id': message['project_id'],
        'tag_str': escape_string(tag),
        'tag_column': escape_col(tag_column_name),
        'timestamp': timestamp.strftime(DATETIME_FORMAT),
    }

    count_query_template = """\
        SELECT count()
        FROM %(dist_read_table_name)s FINAL
    """ + where

    query_time_flags = (NEEDS_FINAL, message['project_id'])

    return Replacement(count_query_template, insert_query_template, query_args,
                       query_time_flags)