def datadocs_to_es(datadoc, session=None): title = datadoc.title cells_as_text = [] for cell in datadoc.cells: if cell.cell_type == DataCellType.text: cells_as_text.append(richtext_to_plaintext(cell.context)) elif cell.cell_type == DataCellType.query: cell_title = cell.meta.get("title", "") cell_text = (cell.context if not cell_title else f"{cell_title}\n{cell.context}") cells_as_text.append(cell_text) else: cells_as_text.append("[... additional unparsable content ...]") joined_cells = escape("\n".join(cells_as_text)) # There is no need to compute the list of editors # for public datadoc since everyone is able to see it editors = ([ editor.uid for editor in get_data_doc_editors_by_doc_id(data_doc_id=datadoc.id, session=session) ] if not datadoc.public else []) expand_datadoc = { "id": datadoc.id, "environment_id": datadoc.environment_id, "owner_uid": datadoc.owner_uid, "created_at": DATETIME_TO_UTC(datadoc.created_at), "cells": joined_cells, "title": title, "public": datadoc.public, "readable_user_ids": editors, } return expand_datadoc
def table_to_es(table, session=None): schema = table.data_schema column_names = [c.name for c in table.columns] schema_name = schema.name table_name = table.name description = ( richtext_to_plaintext(table.information.description, escape=True) if table.information else "" ) full_name = "{}.{}".format(schema_name, table_name) weight = get_table_weight(table.id, session=session) expand_table = { "id": table.id, "metastore_id": schema.metastore_id, "schema": schema_name, "name": table_name, "full_name": full_name, "full_name_ngram": full_name, "completion_name": { "input": [full_name, table_name,], "weight": weight, "contexts": {"metastore_id": schema.metastore_id,}, }, "description": description, "created_at": DATETIME_TO_UTC(table.created_at), "columns": column_names, "golden": table.golden, "importance_score": weight, "tags": [tag.tag_name for tag in table.tags], } return expand_table
def board_to_es(board, fields=None, session=None): def get_board_doc_titles(): return [doc.title for doc in board.docs if doc.title] def get_board_table_names(): table_names = [] for table in board.tables: table_names.append("{}.{}".format(table.data_schema.name, table.name)) return table_names def get_readable_user_ids(): uids = [board.owner_uid] for board_editor in board.editors: uids.append(board_editor.uid) return uids field_to_getter = { "id": board.id, "title": board.name, "environment_id": board.environment_id, "description": lambda: richtext_to_plaintext(board.description), "public": board.public, "owner_uid": board.owner_uid, "full_table_name": get_board_table_names, "doc_name": get_board_doc_titles, "readable_user_ids": get_readable_user_ids, } return _get_dict_by_field(field_to_getter, fields=fields)
def get_joined_cells(datadoc): cells_as_text = [] for cell in datadoc.cells: if cell.cell_type == DataCellType.text: cells_as_text.append(richtext_to_plaintext(cell.context)) elif cell.cell_type == DataCellType.query: cell_title = cell.meta.get("title", "") cell_text = (cell.context if not cell_title else f"{cell_title}\n{cell.context}") cells_as_text.append(cell_text) else: cells_as_text.append("[... additional unparsable content ...]") joined_cells = escape("\n".join(cells_as_text)) return joined_cells
def get_table_description(): return (richtext_to_plaintext(table.information.description, escape=True) if table.information else "")