示例#1
0
 def retstart(self, purge_data=False):
     self._process.kill()
     if purge_data:
         dbc = DatabaseConnection(self.runtime_id)
         dbc.connect_to_database()
         dbc.purge_datasource_schema()
         dbc.disconnect_from_database()
     (child_pipe, parent_pipe) = Pipe(duplex=True)
     self._child_pipe = child_pipe
     self._start_process(
         name=self.name,
         runtime_id=self.runtime_id,
         module_path=self.module_path,
         parent_pipe=parent_pipe,
     )
示例#2
0
# from app import app
# from flask import url_for, redirect, render_template, flash, g, session

import logging
from datetime import datetime
from database_connection import DatabaseConnection

log = logging.getLogger()

db_connection = DatabaseConnection()
db_connection.connect_to_database()

boot_time = datetime.utcnow()


def heartbeat():
    now = datetime.utcnow()
    return ({
        "bootTime": boot_time,
        "uptime": (now - boot_time).total_seconds()
    }, 200)


def get_available_data_sources():
    return (db_connection.get_available_datasources(), 200)


def get_latest_data_for_source(datasource_id):
    res = db_connection.get_most_recent_run_for_datasource(datasource_id)
    if res is None:
        return ({"error": f"can't find datasource {datasource_id}"}, 404)
示例#3
0
    def do_run(name, runtime_id, module_path, parent_pipe):
        spec = importlib.util.spec_from_file_location(f"plugin_{name}",
                                                      module_path)
        plugin_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(plugin_module)
        init = plugin_module.init
        schedule = plugin_module.schedule
        if schedule() is None:
            ingest_data = plugin_module.ingest_data

        data_source_fields = plugin_module.get_fields()
        fetch_data = plugin_module.fetch_data
        clean_data = plugin_module.clean_data

        init(runtime_id, name)

        is_paused = False
        next_trigger_time = None
        last_trigger_time = None
        schedule_trigger = schedule()

        def message_parent(topic, msg):
            parent_pipe.send((topic, msg))

        def tell_parent_status(status):
            parent_pipe.send(("update_status", status))

        db_connection = DatabaseConnection(runtime_id)
        db_connection.connect_to_database()

        if not db_connection.check_if_schema_exists():
            db_connection.schema_setup(data_source_fields)

        while 1:
            # if we have messages from the main process, handle them
            ingests = []
            while parent_pipe.poll() is True:
                msg = (msg_type, msg_body) = parent_pipe.recv()
                if msg_type == "msg_ingest":
                    log.error(f"INGEST: {pformat(msg)}")
                    ingests.append(msg_body)
                elif msg_type == "pause":
                    log.error(f"PAUSED")
                    is_paused = True
                elif msg_type == "unpause":
                    log.error(f"UNPAUSED")
                    is_paused = False
                else:
                    log.error(f"MSG RECV: {pformat(msg)}")

            # after we handle pending events, if schedule says so we do a run.
            now = datetime.now(timezone.utc)
            if (schedule_trigger is None and len(ingests) > 0) or (
                    schedule_trigger is not None and
                (next_trigger_time is None or now >= next_trigger_time)):
                if schedule_trigger is not None:
                    next_trigger_time = schedule_trigger.get_next_fire_time(
                        last_trigger_time, now)
                    message_parent("update_trigger_time", next_trigger_time)

                run_id = uuid4()
                run_start_time = datetime.now(timezone.utc)
                run_succeeded = False
                if is_paused:
                    tell_parent_status("paused")
                    continue
                else:
                    tell_parent_status("running")
                db_connection.begin_run(run_id)
                db_connection.empty_current_raw_table()
                db_connection.empty_current_clean_table()
                try:
                    db_connection.log(
                        time=datetime.now(timezone.utc),
                        severity="info",
                        message="started run",
                        run_id=run_id,
                    )
                    db_connection.update_run(run_id, "fetching")
                    if schedule_trigger is None:
                        raw_data = ingest_data(db_connection, run_id, ingests)
                    else:
                        raw_data = fetch_data(db_connection, run_id)
                    db_connection.insert_data_current_raw(run_id, raw_data)

                    db_connection.update_run(run_id, "cleaning")
                    cleaned_data = clean_data(db_connection, run_id, raw_data)
                    db_connection.insert_data_current_clean(
                        run_id, cleaned_data)

                    db_connection.archive_raw()
                    db_connection.archive_clean()
                    db_connection.log(
                        time=datetime.now(timezone.utc),
                        severity="info",
                        message="finished run",
                        run_id=run_id,
                    )
                    run_succeeded = True
                except Exception as err:
                    db_connection.log(
                        time=datetime.now(timezone.utc),
                        severity="error",
                        message=traceback.format_exc(),
                        run_id=run_id,
                    )
                    log.error(traceback.format_exc())
                    pass
                finally:
                    run_end_time = datetime.now(timezone.utc)
                    run_duration = run_end_time - run_start_time

                    if run_succeeded:
                        log.error(
                            f"Run #{run_id} succeeded in #{run_duration}")
                        db_connection.end_run(run_id, "succeeded")
                    else:
                        log.error(f"Run #{run_id} failed in #{run_duration}")
                        db_connection.end_run(run_id, "failed")
                tell_parent_status("sleeping")
            sleep(1)