Пример #1
0
def pipeline_children_table(path: str):
    """Creates a table that documents all child nodes of a table"""
    pipeline, __ = pipelines.find_node(path.split('/'))
    assert (isinstance(pipeline, pipelines.Pipeline))

    node_durations_and_run_times = node_cost.node_durations_and_run_times(pipeline.path())

    rows = []
    for node in pipeline.nodes.values():
        [avg_duration, avg_run_time] = node_durations_and_run_times.get(tuple(node.path()), ['', ''])

        rows.append(
            _.tr[_.td[_.a(href=views.node_url(node))[node.id.replace('_', '_<wbr>')]],
                 _.td[node.description],
                 _.td[views.format_labels(node)],
                 _.td[node_cost.format_duration(avg_duration)],
                 _.td(style='color:#bbb' if avg_duration == avg_run_time else '')[
                     node_cost.format_duration(avg_run_time)],
                 _.td[node_cost.format_duration(
                     node_cost.compute_cost(node, node_durations_and_run_times))],
                 _.td[(_.input(class_='pipeline-node-checkbox', type='checkbox',
                               value=node.id, name='ids[]', onchange='runButtons.update()')
                 if config.allow_run_from_web_ui() else '')]])

    return \
        str(_.script['var runButtons = new PipelineRunButtons();']) \
        + str(bootstrap.table(['ID', 'Description', '', 'Avg duration', 'Avg run time', 'Cost', ''], rows)) \
        + str(_.script['floatMaraTableHeaders();'])
Пример #2
0
def run(path, nodes, with_upstreams):
    """Runs a pipeline or a sub-set of its nodes"""

    # the pipeline to run
    path = path.split(',')
    pipeline, found = pipelines.find_node(path)
    if not found:
        print(f'Pipeline {path} not found', file=sys.stderr)
        sys.exit(-1)
    if not isinstance(pipeline, pipelines.Pipeline):
        print(
            f'Node {path} is not a pipeline, but a {pipeline.__class__.__name__}',
            file=sys.stderr)
        sys.exit(-1)

    # a list of nodes to run selectively in the pipeline
    _nodes = set()
    for id in (nodes.split(',') if nodes else []):
        node = pipeline.nodes.get(id)
        if not node:
            print(f'Node "{id}" not found in pipeline {path}', file=sys.stderr)
            sys.exit(-1)
        else:
            _nodes.add(node)

    if not run_pipeline(pipeline, _nodes, with_upstreams):
        sys.exit(-1)
Пример #3
0
def node_page(path: str):
    """Creates a node visualization page including title, action buttons, etc."""
    node, found = pipelines.find_node(path.split('/'))
    if not found and node:
        return flask.redirect(views.node_url(node), 302)
    elif not node:
        flask.abort(404, f'Node "{path}" not found')

    title = [node.__class__.__name__, ' ',
             [[_.a(href=views.node_url(parent))[parent.id], ' / '] for parent in node.parents()[1:-1]],
             node.id] if node.parent else 'Data Integration'
    return response.Response(
        title=title,
        action_buttons=action_buttons(node) if config.allow_run_from_web_ui() else [],
        html=[_.script['''
var nodePage = null;
document.addEventListener('DOMContentLoaded', function() {
     nodePage = NodePage("''' + flask.url_for('data_integration.node_page', path='') + '''");
});'''],
              dependency_graph.card(node),
              run_time_chart.card(node),
              node_content(node),
              last_runs.card(node)],
        js_files=['https://www.gstatic.com/charts/loader.js',
                  flask.url_for('data_integration.static', filename='node-page.js'),
                  flask.url_for('data_integration.static', filename='utils.js'),
                  flask.url_for('data_integration.static', filename='run-time-chart.js'),
                  flask.url_for('data_integration.static', filename='system-stats-chart.js'),
                  flask.url_for('data_integration.static', filename='timeline-chart.js'),
                  flask.url_for('data_integration.static', filename='kolorwheel.js')],
        css_files=[flask.url_for('data_integration.static', filename='common.css'),
                   flask.url_for('data_integration.static', filename='node-page.css'),
                   flask.url_for('data_integration.static', filename='timeline-chart.css')])
Пример #4
0
def run_time_chart(path: str):
    node, found = pipelines.find_node(path.split('/'))
    if not found:
        flask.abort(404, f'Node "{path}" not found')

    query = (pathlib.Path(__file__).parent / 'run_time_chart.sql').read_text()

    with mara_db.postgresql.postgres_cursor_context(
            'mara') as cursor:  # type: psycopg2.extensions.cursor
        cursor.execute(query)
        cursor.execute(
            f'SELECT row_to_json(t) FROM pg_temp.node_run_times({"%s"}) t',
            (node.path(), ))
        rows = [row[0] for row in cursor.fetchall()]

        if rows and len(rows) > 1:
            number_of_child_runs = len(
                rows[0]['child_runs']) if rows[0]['child_runs'] else 0

            return str(_.div[
                _.div(id='run-time-chart',
                      class_='google-chart',
                      style=f'height:{100+15*number_of_child_runs}px')[' '],
                _.script[f'''
drawRunTimeChart('run-time-chart', '{path}', {json.dumps(rows)});
    ''']])
        else:
            return str(_.i(style='color:#888')['Not enough data'])
Пример #5
0
def reset_incremental_processing(path):
    """Reset status of incremental processing for a node"""
    path = path.split(',') if path else []
    node, found = pipelines.find_node(path)
    if not found:
        print(f'Node {path} not found', file=sys.stderr)
        sys.exit(-1)
    reset.reset_incremental_processing(path)
Пример #6
0
def do_run(path: str, with_upstreams: bool, ids: str):
    if not config.allow_run_from_web_ui():
        flask.abort(
            403,
            'Running piplelines from web ui is disabled for this instance')
    pipeline, found = pipelines.find_node(path.split('/'))
    if not found:
        flask.abort(404, f'Pipeline "{path}" not found')

    nodes = {pipeline.nodes[id] for id in (ids.split('/') if ids else [])}

    def process_events():
        for event in execution.run_pipeline(pipeline, nodes, with_upstreams):
            yield f'event: {event.__class__.__name__}\ndata: ' + event.to_json(
            ) + '\n\n'

    return flask.Response(process_events(), mimetype="text/event-stream")
Пример #7
0
def timeline_chart(path: str, run_id: int):
    node, __ = pipelines.find_node(path.split('/'))

    run_id = run_id or _latest_run_id(node.path())

    if not run_id:
        return ''

    with mara_db.postgresql.postgres_cursor_context(
            'mara') as cursor:  # type: psycopg2.extensions.cursor
        cursor.execute(
            f'''
SELECT node_path, start_time, end_time, succeeded, is_pipeline
FROM data_integration_node_run
WHERE node_path [1 :{'%(level)s'}] = {'%(node_path)s'}
      AND array_length(node_path, 1) > {'%(level)s'}  
      AND run_id = {'%(run_id)s'};''', {
                'level': len(node.path()),
                'node_path': node.path(),
                'run_id': run_id
            })

        nodes = [{
            'label':
            ' / '.join(node_path[len(node.path()):]),
            'status':
            'succeeded' if succeeded else 'failed',
            'type':
            'pipeline' if is_pipeline else 'task',
            'url':
            flask.url_for('data_integration.node_page',
                          path='/'.join(node_path)),
            'start':
            start_time.isoformat(),
            'end':
            end_time.isoformat()
        } for node_path, start_time, end_time, succeeded, is_pipeline in
                 cursor.fetchall()]

        if nodes:
            return str(_.script[
                f"drawTimelineChart('timeline-chart', {json.dumps(nodes)})"])
        else:
            return ''
Пример #8
0
def run_output(path: str, run_id: int, limit: bool):
    """
    Returns the output of a node and its children as html

    Args:
        path: The path of the node
        run_id: The id of the run to return. If None, then the latest run is returned

    Returns:
        A <div class="run-output">..</div> element
    """
    node, __ = pipelines.find_node(path.split('/'))

    run_id = run_id or _latest_run_id(node.path())

    if not run_id:
        return ''

    line_limit = 1000
    with mara_db.postgresql.postgres_cursor_context(
            'mara') as cursor:  # type: psycopg2.extensions.cursor
        cursor.execute(
            f'''
SELECT node_path, message, format, is_error
FROM data_integration_node_run
  JOIN data_integration_node_output USING (node_run_id)
WHERE node_path [1:{"%s"}] = %s 
      AND run_id = %s
ORDER BY timestamp 
''' + ('LIMIT ' + str(line_limit + 1) if limit else ''),
            (len(node.path()), node.path(), run_id))

        rows = cursor.fetchall()
        return str(_.script[f"""
nodePage.showOutput({json.dumps(rows[:line_limit] if limit else rows)},
               "{path}", 
               {'true' if len(rows) == line_limit + 1 else 'false'});
"""])
Пример #9
0
def last_runs_selector(path: str):
    """
    Returns a html select element for selecting among the last runs of a node

    Args:
        path: The path of the node

    Returns:
        A `<select..><option ../><option ../></select>` element
    """
    node, __ = pipelines.find_node(path.split('/'))

    with mara_db.postgresql.postgres_cursor_context(
            'mara') as cursor:  # type: psycopg2.extensions.cursor
        cursor.execute(
            f'''
SELECT
  run_id,
  to_char(start_time, 'Mon DD HH24:MI') AS start_time,
  extract(EPOCH FROM (end_time - start_time)) AS duration,
  succeeded
FROM data_integration_node_run
WHERE node_path = {"%s"}
ORDER BY run_id DESC;''', (node.path(), ))

        return str(
            _.select(id='last-runs-selector',
                     class_='custom-select',
                     style="border:none",
                     onchange=f"nodePage.switchRun(this.value, '{path}')")
            [[
                _.option(value=str(run_id))
                [f'{start_time}  ({node_cost.format_duration(duration)}, {"succeeded" if succeeded else "failed"})']
                for run_id, start_time, duration, succeeded in
                cursor.fetchall()
            ]])
Пример #10
0
def system_stats(path: str, run_id: int):
    node, __ = pipelines.find_node(path.split('/'))

    run_id = run_id or _latest_run_id(node.path())

    if not run_id:
        return ''

    with mara_db.postgresql.postgres_cursor_context(
            'mara') as cursor:  # type: psycopg2.extensions.cursor
        cursor.execute(
            f'''
SELECT data_integration_system_statistics.*
FROM data_integration_system_statistics
  JOIN data_integration_node_run ON timestamp BETWEEN start_time AND end_time
WHERE run_id = {"%s"} AND node_path = {"%s"};''', (run_id, node.path()))

        data = [[row[0].isoformat()] + list(row[1:])
                for row in cursor.fetchall()]
        if len(data) >= 15:
            return str(_.div(id='system-stats-chart', class_='google-chart')[' ']) \
                   + str(_.script[f'nodePage.showSystemStats({json.dumps(data)});'])
        else:
            return ''
Пример #11
0
def dependency_graph(path: str):
    node, found = pipelines.find_node(path.split('/'))
    if not found:
        flask.abort(404, f'Node "{path}" not found')

    return dependency_graph(node)
Пример #12
0
def run_page(path: str, with_upstreams: bool, ids: str):
    if not config.allow_run_from_web_ui():
        flask.abort(
            403,
            'Running piplelines from web ui is disabled for this instance')

    # the pipeline to run
    pipeline, found = pipelines.find_node(path.split('/'))
    if not found:
        flask.abort(404, f'Pipeline "{path}" not found')
    assert (isinstance(pipeline, pipelines.Pipeline))

    # a list of nodes to run selectively in the pipeline
    nodes = []
    for id in (ids.split('/') if ids else []):
        node = pipeline.nodes.get(id)
        if not node:
            flask.abort(404, f'Node "{id}" not found in pipeline "{path}"')
        else:
            nodes.append(node)

    stream_url = flask.url_for('data_integration.do_run',
                               path=path,
                               with_upstreams=with_upstreams,
                               ids=ids)

    title = [
        'Run ', 'with upstreams ' if with_upstreams else '', ' / '.join([
            str(_.a(href=views.node_url(parent))[parent.id])
            for parent in pipeline.parents()[1:]
        ])
    ]
    if nodes:
        title += [
            ' / [', ', '.join([
                str(_.a(href=views.node_url(node))[node.id]) for node in nodes
            ]), ']'
        ]

    return response.Response(
        html=[
            _.script['''
document.addEventListener('DOMContentLoaded', function() {
     processRunEvents(''' + json.dumps(
                flask.url_for('data_integration.node_page', path='')) + ', ' +
                     json.dumps(stream_url) + ', ' +
                     json.dumps(pipeline.path()) + ''');
});'''],
            _.style[
                'span.action-buttons > * {display:none}'],  # hide reload button until run finishes
            _.div(class_='row')
            [_.div(class_='col-lg-7')[bootstrap.card(body=_.div(
                id='main-output-area', class_='run-output')[''])],
             _.div(class_='col-lg-5 scroll-container')[
                 bootstrap.
                 card(header_left='Timeline',
                      body=[
                          _.div(id='system-stats-chart', class_='google-chart'
                                )[' '],
                          _.div(id='timeline-chart')[' ']
                      ]),
                 _.div(id='failed-tasks-container')[''],
                 _.div(id='running-tasks-container')[''],
                 _.div(id='succeeded-tasks-container')[''],
                 bootstrap.card(id='card-template',
                                header_left=' ',
                                header_right=' ',
                                body=[_.div(class_='run-output')['']])]]
        ],
        js_files=[
            'https://www.gstatic.com/charts/loader.js',
            flask.url_for('data_integration.static',
                          filename='timeline-chart.js'),
            flask.url_for('data_integration.static',
                          filename='system-stats-chart.js'),
            flask.url_for('data_integration.static', filename='utils.js'),
            flask.url_for('data_integration.static', filename='run-page.js')
        ],
        css_files=[
            flask.url_for('data_integration.static',
                          filename='timeline-chart.css'),
            flask.url_for('data_integration.static', filename='run-page.css'),
            flask.url_for('data_integration.static', filename='common.css')
        ],
        action_buttons=[
            response.ActionButton(
                action='javascript:location.reload()',
                label='Run again',
                icon='play',
                title='Run pipeline again with same parameters as before')
        ],
        title=title,
    )