Пример #1
0
def create_table_samples(
    table_id,
    environment_id,
    engine_id,
    partition=None,
    where=None,
    order_by=None,
    order_by_asc=True,
    limit=100,
):
    with DBSession() as session:
        api_assert(limit <= 100, "Too many rows requested")
        verify_environment_permission([environment_id])
        verify_data_table_permission(table_id, session=session)
        verify_query_engine_environment_permission(engine_id,
                                                   environment_id,
                                                   session=session)

        task = run_sample_query.apply_async(args=[
            table_id,
            engine_id,
            current_user.id,
            limit,
            partition,
            where,
            order_by,
            order_by_asc,
        ])
        return task.task_id
Пример #2
0
def get_statement_execution_log(statement_execution_id):
    with DBSession() as session:
        statement_execution = logic.get_statement_execution_by_id(
            statement_execution_id, session=session)
        api_assert(statement_execution is not None,
                   message="Invalid statement execution")
        verify_query_execution_permission(
            statement_execution.query_execution_id, session=session)

        log_path = statement_execution.log_path
        try:
            if log_path.startswith("stream"):
                logs = logic.get_statement_execution_stream_logs(
                    statement_execution_id)
                return list(map(lambda log: log.log, logs))
            else:
                with DBSession() as session:
                    MAX_LOG_RETURN_LINES = 2000
                    result = ""

                    statement_execution = logic.get_statement_execution_by_id(
                        statement_execution_id, session=session)
                    if statement_execution is not None and statement_execution.has_log:
                        with GenericReader(
                                statement_execution.log_path) as reader:
                            result = reader.read_lines(
                                number_of_lines=MAX_LOG_RETURN_LINES)
                            if len(result) == MAX_LOG_RETURN_LINES:
                                result += [
                                    "---------------------------------------------------------------------------",
                                    f"We are truncating results since it reached limit of {MAX_LOG_RETURN_LINES} lines.",
                                ]
                            return result
        except FileDoesNotExist as e:
            abort(RESOURCE_NOT_FOUND_STATUS_CODE, str(e))
Пример #3
0
def verify_query_execution_owner(execution_id, session=None):
    execution = query_execution_logic.get_query_execution_by_id(
        execution_id, session=session)
    api_assert(
        current_user.id == getattr(execution, "uid", None),
        "Action can only be preformed by execution owner",
    )
Пример #4
0
def get_table_query_examples(
    table_id,
    environment_id,
    uid=None,
    engine_id=None,
    with_table_id=None,
    limit=10,
    offset=0,
):
    api_assert(limit < 100)

    with DBSession() as session:
        verify_environment_permission([environment_id])
        verify_data_table_permission(table_id, session=session)
        engines = admin_logic.get_query_engines_by_environment(environment_id,
                                                               session=session)
        engine_ids = [engine.id for engine in engines]
        api_assert(engine_id is None or engine_id in engine_ids,
                   "Invalid engine id")
        query_logs = logic.get_table_query_examples(
            table_id,
            engine_ids,
            uid=uid,
            engine_id=engine_id,
            with_table_id=with_table_id,
            limit=limit,
            offset=offset,
            session=session,
        )
        query_ids = [log.query_execution_id for log in query_logs]

        return query_ids
Пример #5
0
def download_statement_execution_result(statement_execution_id):
    with DBSession() as session:
        statement_execution = logic.get_statement_execution_by_id(
            statement_execution_id, session=session)
        api_assert(statement_execution is not None,
                   message="Invalid statement execution")
        verify_query_execution_permission(
            statement_execution.query_execution_id, session=session)

        reader = GenericReader(statement_execution.result_path)
        response = None
        if reader.has_download_url:
            # If the Reader can generate a download,
            # we proxy download the file
            download_url = reader.get_download_url()
            req = requests.get(download_url, stream=True)

            # 10 KB size
            response = Response(req.iter_content(chunk_size=10 * 1024))
        else:
            # We read the raw file and download it for the user
            reader.start()
            raw = reader.read_raw()
            response = Response(raw)
        response.headers["Content-Type"] = "text/plain"
        response.headers[
            "Content-Disposition"] = 'attachment; filename="result_{}_{}.csv"'.format(
                statement_execution.query_execution_id, statement_execution_id)
        return response
Пример #6
0
def create_datadoc_schedule(
    id,
    cron,
    kwargs,
):
    kwargs_valid, kwargs_valid_reason = validate_datadoc_schedule_config(
        kwargs)
    api_assert(kwargs_valid, kwargs_valid_reason)
    api_assert(validate_cron(cron), "Invalid cron expression")

    schedule_name = schedule_logic.get_data_doc_schedule_name(id)
    with DBSession() as session:
        assert_can_write(id, session=session)
        data_doc = logic.get_data_doc_by_id(id, session=session)
        verify_environment_permission([data_doc.environment_id])

        return schedule_logic.create_task_schedule(
            schedule_name,
            "tasks.run_datadoc.run_datadoc",
            cron=cron,
            kwargs={
                **kwargs, "user_id": current_user.id,
                "doc_id": id
            },
            task_type="user",
            session=session,
        )
Пример #7
0
def create_data_doc_from_execution(
    environment_id,
    execution_id,
    engine_id,
    query_string,
    title=None,
):
    with DBSession() as session:
        verify_environment_permission([environment_id])
        environment = Environment.get(id=environment_id, session=session)
        execution = get_query_execution_by_id(execution_id, session=session)
        uid = current_user.id
        api_assert(execution.uid == uid,
                   "You can only create from your own executions.")

        return logic.create_data_doc_from_execution(
            environment_id=environment_id,
            owner_uid=uid,
            engine_id=engine_id,
            query_string=query_string,
            execution_id=execution_id,
            public=environment.shareable,
            archived=False,
            title=title,
            meta={},
            session=session,
        )
Пример #8
0
def create_table_column_stats_by_name(metastore_name, data):
    """Batch add/update table column stats"""
    # TODO: verify user is a service account
    with DBSession() as session:
        metastore = admin_logic.get_query_metastore_by_name(metastore_name,
                                                            session=session)
        api_assert(metastore, "Invalid metastore")
        verify_metastore_permission(metastore.id, session=session)

        with DataTableFinder(metastore.id) as t_finder:
            for d in data:
                column = t_finder.get_table_column_by_name(
                    schema_name=d["schema_name"],
                    table_name=d["table_name"],
                    column_name=d["column_name"],
                    session=session,
                )

                if column is not None:
                    for s in d["stats"]:
                        logic.upsert_table_column_stat(
                            column_id=column.id,
                            key=s["key"],
                            value=s["value"],
                            uid=current_user.id,
                            session=session,
                        )
    return
Пример #9
0
def get_task_run_records_by_name(id,
                                 offset=0,
                                 limit=10,
                                 hide_successful_jobs=False):
    api_assert(limit < 100, "You are requesting too much data")

    with DBSession() as session:
        task = logic.get_task_schedule_by_id(id=id, session=session)
        api_assert(task, "Invalid task id")

        records, _ = logic.get_task_run_record_run_by_name(
            name=task.name,
            offset=offset,
            limit=limit,
            hide_successful_jobs=hide_successful_jobs,
            session=session,
        )

        data = []
        for record in records:
            record_dict = record.to_dict()
            record_dict["task_type"] = record.task.task_type
            data.append(record_dict)

        return data
Пример #10
0
def search_query_execution(environment_id,
                           filters={},
                           orderBy=None,
                           limit=100,
                           offset=0):
    verify_environment_permission([environment_id])
    with DBSession() as session:
        if "user" in filters:
            api_assert(
                current_user.id == filters["user"],
                "You can only search your own queries",
            )
        else:
            filters["user"] = current_user.id
        query_executions = logic.search_query_execution(
            environment_id=environment_id,
            filters=filters,
            orderBy=orderBy,
            limit=limit,
            offset=offset,
            session=session,
        )

        result = [
            query_execution.to_dict(with_statement=False)
            for query_execution in query_executions
        ]

        return result
Пример #11
0
def suggest_user(name, limit=10, offset=None):
    api_assert(limit is None or limit <= 100, "Requesting too many users")

    query = {
        "suggest": {
            "suggest": {
                "text": (name or "").lower(),
                "completion": {
                    "field": "suggest",
                    "size": limit
                },
            }
        },
    }

    index_name = ES_CONFIG["users"]["index_name"]

    result = None
    try:
        # print '\n--ES latest hosted_index %s\n' % hosted_index
        result = get_hosted_es().search(index=index_name, body=query)
    except Exception as e:
        LOG.info(e)
    finally:
        if result is None:
            result = {}

    options = next(iter(result.get("suggest", {}).get("suggest", [])),
                   {}).get("options", [])
    users = [{
        "id": option.get("_source", {}).get("id"),
        "username": option.get("_source", {}).get("username"),
        "fullname": option.get("_source", {}).get("fullname"),
    } for option in options]
    return users
Пример #12
0
def get_board_by_id(board_id):
    with DBSession() as session:
        assert_can_read(board_id, session=session)
        board = Board.get(id=board_id, session=session)
        api_assert(board is not None, "Invalid board id", 404)
        verify_environment_permission([board.environment_id])
        return board.to_dict(extra_fields=["docs", "tables", "items"])
Пример #13
0
def download_statement_execution_result(statement_execution_id):
    with DBSession() as session:
        statement_execution = logic.get_statement_execution_by_id(
            statement_execution_id, session=session)
        api_assert(statement_execution is not None,
                   message="Invalid statement execution")
        verify_query_execution_permission(
            statement_execution.query_execution_id, session=session)

        download_file_name = f"result_{statement_execution.query_execution_id}_{statement_execution_id}.csv"

        reader = GenericReader(statement_execution.result_path)
        response = None
        if reader.has_download_url:
            # If the Reader can generate a download,
            # we let user download the file by redirection
            download_url = reader.get_download_url(
                custom_name=download_file_name)
            response = redirect(download_url)
        else:
            # We read the raw file and download it for the user
            reader.start()
            raw = reader.read_raw()
            response = Response(raw)
            response.headers["Content-Type"] = "text/csv"
            response.headers[
                "Content-Disposition"] = f'attachment; filename="{download_file_name}"'
        return response
Пример #14
0
def get_schema(schema_id, include_metastore=False, include_table=False):
    with DBSession() as session:
        schema = logic.get_schema_by_id(schema_id, session=session)
        api_assert(schema, "Invalid schema")
        verify_metastore_permission(schema.metastore_id, session=session)

        schema_dict = schema.to_dict(include_metastore, include_table)
        return schema_dict
Пример #15
0
def update_tag(tag_id, meta):
    tag = Tag.get(id=tag_id)
    if (tag.meta or {}).get("admin", False):
        api_assert(current_user.is_admin, "Tag can only be modified by admin")

    return Tag.update(id=tag_id,
                      fields={"meta": meta},
                      skip_if_value_none=True)
Пример #16
0
def soft_delete_data_doc(id):
    with DBSession() as session:
        doc = logic.get_data_doc_by_id(id=id, session=session)
        api_assert(doc, "Invalid doc")
        verify_environment_permission([doc.environment_id])
        api_assert(current_user.id == doc.owner_uid,
                   "You can only delete your own data doc")
        logic.update_data_doc(id=id, archived=True, session=session)
Пример #17
0
def delete_board(board_id, **fields):
    with DBSession() as session:
        assert_can_edit(board_id, session=session)
        board = Board.get(id=board_id, session=session)
        api_assert(not board.board_type == "favorite",
                   "Cannot delete favorite")

        Board.delete(board.id, session=session)
Пример #18
0
def get_task_run_record_by_name(name, offset=0, limit=10, hide_successful_jobs=False):
    api_assert(limit < 1000, "You are requesting too much data")

    records, count = logic.get_task_run_record_run_by_name(
        name=name, offset=offset, limit=limit
    )

    return {"data": records, "count": count}
Пример #19
0
def update_schedule(id, **kwargs):
    allowed_fields = ["cron", "args", "kwargs", "enabled", "options"]
    filtered_kwargs = {k: v for k, v in kwargs.items() if k in allowed_fields}

    if "cron" in filtered_kwargs:
        api_assert(validate_cron(filtered_kwargs["cron"]), "Invalid cron expression")

    return logic.update_task_schedule(id=id, **filtered_kwargs)
Пример #20
0
def run_data_doc(id):
    schedule_name = schedule_logic.get_data_doc_schedule_name(id)
    with DBSession() as session:
        assert_can_write(id, session=session)
        verify_data_doc_permission(id, session=session)
        schedule = schedule_logic.get_task_schedule_by_name(schedule_name,
                                                            session=session)
        api_assert(schedule, "Schedule does not exist")
        run_and_log_scheduled_task(schedule.id, session=session)
Пример #21
0
def assert_can_write(doc_id, session=None):
    try:
        api_assert(
            user_can_write(doc_id, uid=current_user.id, session=session),
            "CANNOT_WRITE_DATADOC",
            403,
        )
    except DocDoesNotExist:
        api_assert(False, "DOC_DNE", 404)
Пример #22
0
def assert_can_read(board_id, session=None):
    try:
        api_assert(
            user_can_read(board_id, uid=current_user.id, session=session),
            "CANNOT_READ_BOARD",
            403,
        )
    except BoardDoesNotExist:
        api_assert(False, "BOARD_DNE", 404)
Пример #23
0
def get_table_query_examples_users(table_id, environment_id, limit=5):
    api_assert(limit <= 10)
    verify_environment_permission([environment_id])
    verify_data_table_permission(table_id)
    engines = admin_logic.get_query_engines_by_environment(environment_id)
    engine_ids = [engine.id for engine in engines]
    users = logic.get_query_example_users(table_id, engine_ids, limit=limit)

    return [{"uid": r[0], "count": r[1]} for r in users]
Пример #24
0
def verify_query_engine_environment_permission(query_engine_id,
                                               environment_id,
                                               session=None):
    api_assert(
        session.query(QueryEngineEnvironment).filter_by(
            query_engine_id=query_engine_id,
            environment_id=environment_id).first() is not None,
        message="Engine is not in Environment",
        status_code=ACCESS_RESTRICTED_STATUS_CODE,
    )
Пример #25
0
def delete_board_item(board_id, item_type, item_id):
    api_assert(item_type == "data_doc" or item_type == "table",
               "Invalid item type")
    with DBSession() as session:
        assert_can_edit(board_id, session=session)

        board = Board.get(id=board_id, session=session)
        logic.remove_item_from_board(board.id,
                                     item_id,
                                     item_type,
                                     session=session)
Пример #26
0
def get_query_engine_status(engine_id):
    engine_checker = None
    # Security check
    with DBSession() as session:
        verify_query_engine_permission(engine_id, session=session)
        engine = admin_logic.get_query_engine_by_id(engine_id, session=session)
        engine_checker = get_engine_checker_class(
            engine.get_feature_params().get("status_checker", "NullChecker"))

    api_assert(engine_checker is not None, "Invalid engine checker")
    return engine_checker.check(engine_id=engine_id, uid=current_user.id)
Пример #27
0
def get_table(table_id,
              with_schema=True,
              with_column=True,
              with_warnings=True):
    # username = flask_session['uid']
    with DBSession() as session:
        table = logic.get_table_by_id(table_id, session=session)
        api_assert(table, "Invalid table")
        verify_data_schema_permission(table.schema_id, session=session)
        result = table.to_dict(with_schema, with_column, with_warnings)
        return result
Пример #28
0
def update_board_item_fields(board_item_id, **fields):
    with DBSession() as session:
        board_item = BoardItem.get(id=board_item_id, session=session)
        api_assert(
            board_item,
            "List item does not exist",
        )
        assert_can_edit(board_item.parent_board_id, session=session)

        return logic.update_board_item(id=board_item_id,
                                       **fields,
                                       session=session)
Пример #29
0
def assert_is_owner(doc_id, session=None):
    try:
        doc = session.query(DataDoc).filter(DataDoc.id == doc_id).first()
        if doc is None:
            raise DocDoesNotExist
        api_assert(
            doc.owner_uid == current_user.id,
            "NOT_DATADOC_OWNER",
            403,
        )
    except DocDoesNotExist:
        api_assert(False, "DOC_DNE", 404)
Пример #30
0
def assert_is_owner(board_id, session=None):
    try:
        board = session.query(Board).filter(Board.id == board_id).first()
        if board is None:
            raise BoardDoesNotExist
        api_assert(
            board.owner_uid == current_user.id,
            "NOT_BOARD_OWNER",
            403,
        )
    except BoardDoesNotExist:
        api_assert(False, "BOARD_DNE", 404)