Пример #1
0
def get_table_weight(table_id: int, session=None) -> int:
    """Calculate the weight of table. Used for ranking in auto completion
       and sidebar table search.

    Arguments:
        table_id {int} -- Id of DataTable

    Keyword Arguments:
        session -- Sqlalchemy DB session (default: {None})

    Returns:
        int -- The integer weight
    """
    num_samples = get_table_query_samples_count(table_id, session=session)
    num_impressions = get_viewers_count_by_item_after_date(
        ImpressionItemType.DATA_TABLE,
        table_id,
        get_last_impressions_date(),
        session=session,
    )
    boost_score = get_table_by_id(table_id, session=session).boost_score

    # Samples worth 10x as much as impression
    # Log the score to flatten the score distrution (since its power law distribution)
    return int(
        math.log2(((num_impressions + num_samples * 10) + 1) * boost_score))
Пример #2
0
def get_table(table_id,
              with_schema=True,
              with_column=True,
              with_warnings=True):
    # username = flask_session['uid']
    with DBSession() as session:
        table = logic.get_table_by_id(table_id, session=session)
        api_assert(table, "Invalid table")
        verify_data_schema_permission(table.schema_id, session=session)
        result = table.to_dict(with_schema, with_column, with_warnings)
        return result
Пример #3
0
def update_table(table_id, description=None, golden=None):
    """Update a table"""
    with DBSession() as session:
        verify_data_table_permission(table_id, session=session)
        if description:
            logic.update_table_information(table_id,
                                           description=description,
                                           session=session)
        if golden is not None:
            api_assert(current_user.is_admin,
                       "Golden table can only be updated by Admin")
            logic.update_table(table_id, golden=golden, session=session)

        return logic.get_table_by_id(table_id, session=session)
Пример #4
0
def refresh_table_from_metastore(table_id):
    """Refetch table info from metastore"""
    with DBSession() as session:
        verify_data_table_permission(table_id, session=session)

        table = logic.get_table_by_id(table_id, session=session)
        schema = table.data_schema

        metastore_id = schema.metastore_id
        metastore_loader = get_metastore_loader(metastore_id, session=session)
        metastore_loader.sync_create_or_update_table(schema.name,
                                                     table.name,
                                                     session=session)

        session.refresh(table)
        return table
Пример #5
0
def make_samples_query(
    table_id,
    limit,
    partition=None,
    where: List[Tuple[str, str, str]] = None,
    order_by=None,
    order_by_asc=True,
    session=None,
):
    table = get_table_by_id(table_id, session=session)
    column_type_by_name = {
        column.name: get_column_type_from_string(column.type)
        for column in table.columns
    }
    query_filters = []

    partition = _verify_or_get_partition(table, partition)
    if partition:
        query_filters.extend(
            _format_partition_filter(partition, column_type_by_name))

    if where is not None:
        for where_filter in where:
            query_filters.append(
                _format_where_clause_filter(where_filter, column_type_by_name))

    query_filter_str = ("WHERE\n{}".format(" AND ".join(query_filters))
                        if len(query_filters) else "")

    order_by_str = ""
    if order_by is not None:
        if order_by not in column_type_by_name:
            raise SamplesError("Invalid order by " + order_by)
        order_by_str = "ORDER BY {} {}".format(
            order_by, "ASC" if order_by_asc else "DESC")

    full_name = "{}.{}".format(table.data_schema.name, table.name)
    query = """
SELECT
    *
FROM {}
{}
{}
LIMIT {}""".format(full_name, query_filter_str, order_by_str, limit)

    return query
Пример #6
0
def update_table_by_id(table_id, session=None):
    index_name = ES_CONFIG["tables"]["index_name"]

    table = get_table_by_id(table_id, session=session)
    if table is None:
        delete_es_table_by_id(table_id)
    else:
        formatted_object = table_to_es(table, session=session)
        try:
            # Try to update if present
            updated_body = {
                "doc": formatted_object,
                "doc_as_upsert": True,
            }  # ES requires this format for updates
            _update(index_name, table_id, updated_body)
        except Exception:
            # Otherwise insert as new
            LOG.error("failed to upsert {}. Will pass.".format(table_id))
def make_samples_query(
    table_id,
    limit,
    partition=None,
    where: Tuple[str, str, str] = None,
    order_by=None,
    order_by_asc=True,
    session=None,
):
    table = get_table_by_id(table_id, session=session)
    information = table.information
    columns = table.columns
    column_type_by_name = {
        column.name: get_column_type_from_string(column.type)
        for column in columns
    }
    query_filters = []

    partitions = []
    if information:
        partitions = json.loads(information.to_dict().get("latest_partitions")
                                or "[]")

    if partition is None:
        partition = next(iter(reversed(partitions)), None)
    else:
        # Check the validity of partition provided
        if not (len(partitions) and partition in partitions):
            raise SamplesError("Invalid partition " + partition)

    if partition:  # latest_partitions is like dt=2015-01-01/column1=val1
        for column_filter in partition.split("/"):
            column_name, column_val = column_filter.split("=")
            column_type = column_type_by_name.get(column_name, None)
            column_quote = ""
            if column_type == QuerybookColumnType.String:
                column_quote = "'"

            query_filters.append(
                f"{column_name}={column_quote}{column_val}{column_quote}")

    if where is not None:
        column_name, filter_op, filter_val = where
        if column_name not in column_type_by_name:
            raise SamplesError("Invalid filter column " + column_name)
        column_type = column_type_by_name[column_name]

        if filter_op not in COMPARSION_OP:
            raise SamplesError("Invalid filter op " + filter_op)

        if filter_op in ["=", "!=", "LIKE"]:
            if column_type == QuerybookColumnType.Number:
                if not filter_val or not filter_val.isnumeric():
                    raise SamplesError("Invalid numeric filter value " +
                                       filter_val)
            elif column_type == QuerybookColumnType.Boolean:
                if filter_val != "true" and filter_val != "false":
                    raise SamplesError("Invalid boolean filter value " +
                                       filter_val)
            else:  # column_type == QuerybookColumnType.String
                filter_val = "'{}'".format(json.dumps(filter_val)[1:-1])
        else:
            filter_val = ""

        query_filters.append(f"{column_name} {filter_op} {filter_val}")

    query_filter_str = ("WHERE\n{}".format(" AND ".join(query_filters))
                        if len(query_filters) else "")

    order_by_str = ""
    if order_by is not None:
        if order_by not in column_type_by_name:
            raise SamplesError("Invalid order by " + order_by)
        order_by_str = "ORDER BY {} {}".format(
            order_by, "ASC" if order_by_asc else "DESC")

    full_name = "%s.%s" % (table.data_schema.name, table.name)
    query = """
SELECT
    *
FROM {}
{}
{}
LIMIT {}""".format(full_name, query_filter_str, order_by_str, limit)

    return query