def format_labels(node: pipelines.Node): """Html markup that comma-separates labels of a node""" return ', '.join([ str(_.span[label, ': ', _.tt(style='white-space:nowrap')[value]]) for label, value in node.labels.items() ]) or ''
def oauth2_export_to_google_sheet(): try: import google_auth_oauthlib.flow import googleapiclient.discovery except ImportError: return flask.make_response( str(_.tt(style='color:red')[ "Please install the packages 'google_auth_oauthlib' and 'google-api-python-client'"]), 500) os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' from .query import Query query = Query.from_dict(json.loads(flask.request.form['query'])) if current_user_has_permission(query): flask.session['query_for_google_sheet_callback'] = json.loads(flask.request.form['query']) # flask.request.json # Authorization flow = google_auth_oauthlib.flow.Flow.from_client_config( config.google_sheet_oauth2_client_config(), scopes=SCOPES) # Indicate where the API server will redirect the user after the user completes # the authorization flow. Required. flow.redirect_uri = flask.url_for('mara_data_explorer.google_sheet_oauth2callback', _external=True) # Generate URL for request to Google's OAuth 2.0 server authorization_url, state = flow.authorization_url( # Enable offline access so that you can refresh an access token without # re-prompting the user for permission. Recommended for web server apps. access_type='offline', # Enable incremental authorization. Recommended as a best practice include_granted_scopes='true') # Store the state so the callback can verify the auth server response. flask.session['state'] = state # Store parameters for callback flask.session['decimal_mark'] = flask.request.form['decimal-mark'] flask.session['array_format'] = flask.request.form['array-format'] return flask.redirect(authorization_url) else: return flask.make_response(acl.inline_permission_denied_message(), 403)
def google_sheet_oauth2callback(): try: import google_auth_oauthlib.flow import googleapiclient.discovery except ImportError: return flask.make_response( str(_.tt(style='color:red')[ "Please install the packages 'google_auth_oauthlib' and 'google-api-python-client'"]), 500) from .query import Query query = Query.from_dict(flask.session['query_for_google_sheet_callback']) # Specify the state when creating the flow in the callback so that it can # verified in the authorization server response. state = flask.session['state'] flow = google_auth_oauthlib.flow.Flow.from_client_config( config.google_sheet_oauth2_client_config(), scopes=SCOPES, state=state) flow.redirect_uri = flask.url_for('mara_data_explorer.google_sheet_oauth2callback', _external=True) # Use the authorization server's response to fetch the OAuth 2.0 tokens. authorization_response = flask.request.url flow.fetch_token(authorization_response=authorization_response) spreadsheet_title = query.data_set_id + ('-' + query.query_id if query.query_id else '') \ + '-' + datetime.date.today().isoformat() decimal_mark = flask.session.pop('decimal_mark') array_format = flask.session.pop('array_format') credentials = flow.credentials service = googleapiclient.discovery.build('sheets', 'v4', credentials=credentials) spreadsheet_body = { 'properties': { 'title': spreadsheet_title, # Determine decimal-mark through the Google sheet locale 'locale': 'de_DE' if decimal_mark == ',' else 'en_US' } } spreadsheet = service.spreadsheets().create(body=spreadsheet_body, fields='spreadsheetId') api_response = spreadsheet.execute() spreadsheet_id = api_response.get('spreadsheetId') # Upload data from generator in batches of 10K rows each. Each batch is a request # Api limits: https://developers.google.com/sheets/api/limits data_batch = [] google_sheet_rows = query.as_rows_for_google_sheet( array_format=array_format, limit=100000, include_personal_data=acl.current_user_has_permission(personal_data_acl_resource)) row_count = 0 batch_length = 10000 for row in google_sheet_rows: row_count += 1 data_batch.append(row) if row_count % batch_length == 0: body = { "data": [ { "values": data_batch, "range": "A" + str(row_count - batch_length + 1), "majorDimension": "ROWS" } ], # USER_ENTERED: The values will be parsed as if the user typed them into the UI. # Numbers will stay as numbers, but strings may be converted to numbers, dates, etc. # RAW: The values the user has entered will not be parsed and will be stored as-is. "valueInputOption": "USER_ENTERED" } # Request of the current batch service.spreadsheets().values().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute() # Initialize next batch data_batch = [] # Append remaining or less batch_length data if any if row_count < batch_length: body = {"data": [{"values": data_batch, "range": "A1", "majorDimension": "ROWS"}], "valueInputOption": "USER_ENTERED"} service.spreadsheets().values().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute() elif row_count % batch_length > 0: body = {"data": [{"values": data_batch, "range": "A" + str(row_count - len(data_batch) + 1), "majorDimension": "ROWS"}], "valueInputOption": "USER_ENTERED"} service.spreadsheets().values().batchUpdate(spreadsheetId=spreadsheet_id, body=body).execute() return flask.redirect('https://docs.google.com/spreadsheets/d/' + str(spreadsheet_id))
def dependency_graph(nodes: {str: pipelines.Node}, current_node: pipelines.Node = None) -> str: """ Draws a list of pipeline nodes and the dependencies between them using graphviz Args: nodes: The nodes to render current_node: If not null, then this node is highlighted Returns: An svg representation of the graph """ import graphviz graph = graphviz.Digraph(graph_attr={ 'rankdir': 'TD', 'ranksep': '0.25', 'nodesep': '0.1' }) # for finding redundant edges @functools.lru_cache(maxsize=None) def all_transitive_upstream_ids(node: pipelines.Node, first_level: bool = True): upstream_ids = set() for upstream in node.upstreams: if not first_level: upstream_ids.add(upstream.id) upstream_ids.update(all_transitive_upstream_ids(upstream, False)) return upstream_ids for node in nodes.values(): node_attributes = { 'fontname': ' ', # use website default 'fontsize': '10.5px' # fontsize unfortunately must be set } if node != current_node: node_attributes.update({ 'href': views.node_url(node), 'fontcolor': '#0275d8', 'tooltip': node.description, 'color': 'transparent' }) else: node_attributes.update({'color': '#888888', 'style': 'dotted'}) if isinstance(node, pipelines.Pipeline): node_attributes.update({ 'shape': 'rectangle', 'style': 'dotted', 'color': '#888888' }) elif isinstance(node, pipelines.ParallelTask): node_attributes.update({ 'shape': 'ellipse', 'style': 'dotted', 'color': '#888888' }) else: node_attributes['shape'] = 'rectangle' graph.node(name=node.id, label=node.id.replace('_', '\n'), _attributes=node_attributes) for upstream in node.upstreams: if upstream.id in nodes: style = 'solid' color = '#888888' if upstream.id in all_transitive_upstream_ids(node): style = 'dashed' color = '#cccccc' graph.edge(upstream.id, node.id, _attributes={ 'color': color, 'arrowsize': '0.7', 'style': style }) elif (not current_node) or node in current_node.upstreams: graph.node(name=f'{upstream.id}_{node.id}', _attributes={ 'style': 'invis', 'label': '', 'height': '0.1', 'fixedsize': 'true' }) graph.edge(f'{upstream.id}_{node.id}', node.id, _attributes={ 'color': '#888888', 'arrowsize': '0.7', 'edgetooltip': upstream.id, 'style': 'dotted' }) for downstream in node.downstreams: if downstream.id not in nodes and (not current_node or node in current_node.downstreams): graph.node(name=f'{downstream.id}_{node.id}', _attributes={ 'style': 'invis', 'label': '', 'height': '0.1', 'fixedsize': 'true' }) graph.edge(node.id, f'{downstream.id}_{node.id}', _attributes={ 'color': '#888888', 'arrowsize': '0.7', 'edgetooltip': downstream.id, 'style': 'dotted' }) try: return graph.pipe('svg').decode('utf-8') except graphviz.backend.ExecutableNotFound as e: return str(_.tt(style='color:red')[str(e)])
def draw_schema(db_alias: str, schemas: str): """Shows a chart of the tables and FK relationships in a given database and schema list""" if db_alias not in config.databases(): flask.abort(404, f'unkown database {db_alias}') schema_names = schemas.split('/') hide_columns = flask.request.args.get('hide-columns') engine = flask.request.args.get('engine', 'neato') tables, fk_constraints = extract_schema(db_alias, schema_names) import graphviz.backend graph = graphviz.Digraph(engine=engine, graph_attr={ 'splines': 'True', 'overlap': 'ortho' }) schema_colors = {} fk_pattern = re.compile(config.schema_ui_foreign_key_column_regex()) for schema_name, table_name in sorted(tables): if schema_name not in schema_colors: colors = [ '#ffffcc', '#bbffcc', '#cceeff', '#eedd99', '#ddee99', '#99ddff', '#dddddd' ] schema_colors[schema_name] = colors[len(schema_colors) % len(colors)] label = '< <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="1" BGCOLOR="' \ + schema_colors[schema_name] + '"><TR>' node_name = schema_name + '.' + table_name if hide_columns: label += '<TD ALIGN="LEFT"> ' + table_name.replace( '_', '<BR/>') + ' </TD></TR>' else: label += '<TD ALIGN="LEFT"><U><B> ' + table_name + ' </B></U></TD></TR>' for column in tables[(schema_name, table_name)]['columns']: label += '<TR><TD ALIGN="LEFT" > ' if fk_pattern.match(column) \ and column not in tables[(schema_name, table_name)]['constrained-columns']: label += '<B><I><FONT COLOR="#dd55dd"> ' + column + ' </FONT></I></B>' else: label += column label += ' </TD></TR>' label += '</TABLE> >' graph.node(name=node_name, label=label, _attributes={ 'fontname': 'Helvetica, Arial, sans-serif', 'fontsize': '10', 'fontcolor': '#555555', 'shape': 'none' }) for (schema_name, table_name), (referenced_schema_name, referenced_table_name) in fk_constraints: graph.edge(schema_name + '.' + table_name, referenced_schema_name + '.' + referenced_table_name, _attributes={'color': '#888888'}) try: svg = graph.pipe('svg').decode('utf-8') except graphviz.backend.ExecutableNotFound as e: return str(_.tt(style='color:red')[str(e)]) response = flask.Response(svg) response.headers[ 'Content-Disposition'] = f'attachment; filename="{datetime.date.today().isoformat()}-{db_alias}.svg"' return response