示例#1
0
def create_app(config_class=None):
    app = Flask(__name__)
    app.config.from_object(config_class)

    init_logging()

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # Create the database if it does not exist yet. Roughly equal to
    # a "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])

    db.init_app(app)
    # necessary for migration
    Migrate().init_app(app, db)

    with app.app_context():
        # Upgrade to the latest revision. This also takes care of
        # bringing an "empty" db (no tables) on par.
        upgrade()

    register_views(app)

    return app
示例#2
0
def create_app(config_class=None):
    app = Flask(__name__)
    app.config.from_object(config_class)

    # Initialize the database and create the database file.
    db.init_app(app)
    with app.app_context():
        db.create_all()

    register_views(app)

    return app
示例#3
0
def create_app(config_class=None):
    app = Flask(__name__)
    app.config.from_object(config_class)

    # Cross-origin resource sharing. Allow API to be requested from the
    # different microservices such as the webserver.
    CORS(app, resources={r'/*': {'origins': '*'}})

    # Initialize the database and create the database file.
    db.init_app(app)
    with app.app_context():
        db.create_all()

    # Register blueprints.
    app.register_blueprint(api, url_prefix='/api')

    return app
示例#4
0
def create_app(config_class=None, to_migrate_db=False):
    """Create the Flask app and return it.

    Args:
        to_migrate_db: If True, then only initialize the db.

    Returns:
        Flask.app
    """
    app = Flask(__name__)
    app.config.from_object(config_class)

    if not to_migrate_db:
        orchest_config = requests.get(
            f"http://{CONFIG_CLASS.ORCHEST_API_ADDRESS}/api/ctl/orchest-settings"
        ).json()
        app.config.update(orchest_config)

    init_logging()

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # Create the database if it does not exist yet. Roughly equal to
    # a "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])

    db.init_app(app)
    # necessary for migration
    Migrate().init_app(app, db)

    # NOTE: In this case we want to return ASAP as otherwise the DB
    # might be called (inside this function) before it is migrated.
    if to_migrate_db:
        return app

    register_views(app)

    return app
示例#5
0
def create_app(
    config_class=None,
    use_db=True,
    be_scheduler=False,
    to_migrate_db=False,
    register_api=True,
):
    """Create the Flask app and return it.

    Args:
        config_class: Configuration class. See orchest-api/app/config.
        use_db: If true, associate a database to the Flask app instance,
            which implies connecting to a given database and possibly
            creating such database and/or tables if they do not exist
            already. The reason to differentiate instancing the app
            through this argument is that the celery worker does not
            need to connect to the db that "belongs" to the orchest-api.
        be_scheduler: If true, a background thread will act as a job
            scheduler, according to the logic in core/scheduler. While
            Orchest runs, only a single process should be acting as
            scheduler.
        to_migrate_db: If True, then only initialize the DB so that the
            DB can be migrated.
        register_api: If api endpoints should be registered.

    Returns:
        Flask.app
    """
    app = Flask(__name__)
    app.config.from_object(config_class)

    init_logging()

    # In development we want more verbose logging of every request.
    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # Cross-origin resource sharing. Allow API to be requested from the
    # different microservices such as the webserver.
    CORS(app, resources={r"/*": {"origins": "*"}})

    if use_db:
        # Create the database if it does not exist yet. Roughly equal to
        # a "CREATE DATABASE IF NOT EXISTS <db_name>" call.
        if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
            create_database(app.config["SQLALCHEMY_DATABASE_URI"])

        db.init_app(app)

        # Necessary for db migrations.
        Migrate().init_app(app, db)

        # NOTE: In this case we want to return ASAP as otherwise the DB
        # might be called (inside this function) before it is migrated.
        if to_migrate_db:
            return app

        with app.app_context():
            settings = utils.OrchestSettings()
            settings.save()
            app.config.update(settings.as_dict())

    # Create a background scheduler (in a daemon thread) for every
    # gunicorn worker. The individual schedulers do not cause duplicate
    # execution because all jobs of the all the schedulers read state
    # from the same DB and lock rows they are handling (using a
    # `with_for_update`).
    # In case of Flask development mode, every child process will get
    # its own scheduler.
    if be_scheduler:
        # Create a scheduler and have the scheduling logic running
        # periodically.
        app.logger.info("Creating a backgroundscheduler in a daemon thread.")
        scheduler = BackgroundScheduler(
            job_defaults={
                # Infinite amount of grace time, so that if a task
                # cannot be instantly executed (e.g. if the webserver is
                # busy) then it will eventually be.
                "misfire_grace_time": 2 ** 31,
                "coalesce": False,
                # So that the same job can be in the queue an infinite
                # amount of times, e.g. for concurrent requests issuing
                # the same tasks.
                "max_instances": 2 ** 31,
            },
        )

        app.config["SCHEDULER"] = scheduler
        add_recurring_jobs_to_scheduler(scheduler, app, run_on_add=True)
        scheduler.start()

        if not _utils.is_running_from_reloader():
            with app.app_context():
                trigger_conditional_jupyter_image_build(app)

    app.logger.info("Creating required directories for Orchest services.")
    create_required_directories()

    if register_api:
        # Register blueprints at the end to avoid issues when migrating
        # the DB. When registering a blueprint the DB schema is also
        # registered and so the DB migration should happen before it..
        app.register_blueprint(api, url_prefix="/api")

    return app
示例#6
0
def create_app():

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    socketio = SocketIO(app, cors_allowed_origins="*")
    logging.getLogger("engineio").setLevel(logging.ERROR)

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception as e:
        logging.warning("Failed to load config.json")

    logging.info("Flask CONFIG: %s" % app.config)

    db.init_app(app)

    # according to SQLAlchemy will only create tables if they do not exist
    with app.app_context():
        db.create_all()

        initialize_default_images(db)
        initialize_default_datasources(db, app)

        logging.info("Initializing kernels")
        populate_kernels(app, db)

    # static file serving
    @app.route("/public/<path:path>")
    def send_files(path):
        return send_from_directory("../static", path)

    register_views(app, db)
    register_build_views(app, db, socketio)
    register_socketio_broadcast(db, socketio)

    if ("TELEMETRY_DISABLED" not in app.config
            and os.environ.get("FLASK_ENV") != "development"):
        # create thread for analytics
        scheduler = BackgroundScheduler()

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(
            analytics_ping,
            "interval",
            minutes=app.config["TELEMETRY_INTERVAL"],
            args=[app],
        )
        scheduler.start()

    processes = []

    if process_start_gate():

        file_dir = os.path.dirname(os.path.realpath(__file__))

        # TODO: reconsider file permission approach
        # file_permission_watcher process
        permission_process = Popen(
            [
                "python3",
                "-m",
                "scripts.file_permission_watcher",
                app.config["USER_DIR"],
            ],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )
        logging.info("Started file_permission_watcher.py")
        processes.append(permission_process)

        # docker_builder process
        docker_builder_process = Popen(
            ["python3", "-m", "scripts.docker_builder"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )
        logging.info("Started docker_builder.py")
        processes.append(docker_builder_process)

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        logging.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes
示例#7
0
def create_app(config_class=None, use_db=True):
    """Create the Flask app and return it.

    Args:
        config_class: Configuration class. See orchest-api/app/config.
        use_db: If true, associate a database to the Flask app instance,
            which implies connecting to a given database and possibly
            creating such database and/or tables if they do not exist
            already. The reason to differentiate instancing the app
            through this argument is that the celery worker does not
            need to connect to the db that "belongs" to the orchest-api.

    Returns:
        Flask.app
    """
    app = Flask(__name__)
    app.config.from_object(config_class)

    init_logging()

    # Cross-origin resource sharing. Allow API to be requested from the
    # different microservices such as the webserver.
    CORS(app, resources={r"/*": {"origins": "*"}})

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    if use_db:
        # Create the database if it does not exist yet. Roughly equal to
        # a "CREATE DATABASE IF NOT EXISTS <db_name>" call.
        if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
            create_database(app.config["SQLALCHEMY_DATABASE_URI"])

        db.init_app(app)
        # necessary for migration
        Migrate().init_app(app, db)

        with app.app_context():
            # Upgrade to the latest revision. This also takes care of
            # bringing an "empty" db (no tables) on par.
            upgrade()

            # In case of an ungraceful shutdown, these entities could be
            # in an invalid state, so they are deleted, since for sure
            # they are not running anymore.
            # To avoid the issue of entities being deleted because of a
            # flask app reload triggered by a dev mode code change, we
            # attempt to create a directory first. Since this is an
            # atomic operation that will result in an error if the
            # directory is already there, this cleanup operation will
            # run only once per container.
            try:
                os.mkdir("/tmp/interactive_cleanup_done")
                InteractiveSession.query.delete()
                InteractivePipelineRun.query.filter(
                    InteractivePipelineRun.status.in_(
                        ["PENDING",
                         "STARTED"])).delete(synchronize_session="fetch")
                db.session.commit()
            except FileExistsError:
                pass

    # Register blueprints.
    app.register_blueprint(api, url_prefix="/api")

    return app
示例#8
0
def create_app():
    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    init_logging()

    socketio = SocketIO(app, cors_allowed_origins="*")

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception:
        app.logger.warning("Failed to load config.json")

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # create thread for non-cpu bound background tasks, e.g. requests
    scheduler = BackgroundScheduler(
        job_defaults={
            # Infinite amount of grace time, so that if a task cannot be
            # instantly executed (e.g. if the webserver is busy) then it
            # will eventually be.
            "misfire_grace_time": 2**31,
            "coalesce": False,
            # So that the same job can be in the queue an infinite
            # amount of times, e.g. for concurrent requests issuing the
            # same tasks.
            "max_instances": 2**31,
        })
    app.config["SCHEDULER"] = scheduler
    scheduler.start()

    app.logger.info("Flask CONFIG: %s" % app.config)

    # Create the database if it does not exist yet. Roughly equal to a
    # "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])
    db.init_app(app)
    ma.init_app(app)
    # necessary for migration
    Migrate().init_app(app, db)

    with app.app_context():

        # Alembic does not support calling upgrade() concurrently
        if not is_werkzeug_parent():
            # Upgrade to the latest revision. This also takes care of
            # bringing an "empty" db (no tables) on par.
            try:
                upgrade()
            except Exception as e:
                logging.error("Failed to run upgrade() %s [%s]" % (e, type(e)))

            # On startup all kernels are freshed. This is because
            # updating Orchest might make the kernels in the
            # userdir/.orchest/kernels directory invalid.
            projs = Project.query.all()
            for proj in projs:
                try:
                    populate_kernels(app, db, proj.uuid)
                except Exception as e:
                    logging.error(
                        "Failed to populate kernels on startup for project %s: %s [%s]"
                        % (proj.uuid, e, type(e)))

        # To avoid multiple removals in case of a flask --reload, so
        # that this code runs once per container.
        try:
            os.mkdir("/tmp/jupyter_lock_removed")
            lock_path = os.path.join("/userdir", _config.JUPYTER_USER_CONFIG,
                                     "lab", ".bootlock")
            if os.path.exists(lock_path):
                app.logger.info("Removing dangling jupyter boot lock.")
                os.rmdir(lock_path)

        except FileExistsError:
            app.logger.info("/tmp/jupyter_lock_removed exists. "
                            " Not removing the lock again.")

    # Telemetry
    if not app.config["TELEMETRY_DISABLED"]:
        # initialize posthog
        posthog.api_key = base64.b64decode(
            app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(
            analytics_ping,
            "interval",
            minutes=app.config["TELEMETRY_INTERVAL"],
            args=[app],
        )

    # static file serving
    @app.route("/", defaults={"path": ""}, methods=["GET"])
    @app.route("/<path:path>", methods=["GET"])
    def index(path):
        # in Debug mode proxy to CLIENT_DEV_SERVER_URL
        if os.environ.get("FLASK_ENV") == "development":
            return _proxy(request, app.config["CLIENT_DEV_SERVER_URL"] + "/")
        else:
            file_path = os.path.join(app.config["STATIC_DIR"], path)
            if os.path.isfile(file_path):
                return send_from_directory(app.config["STATIC_DIR"], path)
            else:
                return send_from_directory(app.config["STATIC_DIR"],
                                           "index.html")

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(socketio)
    register_analytics_views(app, db)

    processes = []

    if not is_werkzeug_parent():

        file_dir = os.path.dirname(os.path.realpath(__file__))

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        app.logger.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes
示例#9
0
def create_app():

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    socketio = SocketIO(app, cors_allowed_origins="*")

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception as e:
        logging.warning("Failed to load config.json")

    logging.info("Flask CONFIG: %s" % app.config)

    db.init_app(app)

    # according to SQLAlchemy will only create tables if they do not exist
    with app.app_context():
        db.create_all()

        initialize_default_images(db)

        logging.info("Initializing kernels")
        populate_kernels(app, db)

    # static file serving
    @app.route('/public/<path:path>')
    def send_files(path):
        return send_from_directory("../static", path)

    register_views(app, db)
    register_build_views(app, db, socketio)

    if "TELEMETRY_DISABLED" not in app.config:
        # create thread for analytics
        scheduler = BackgroundScheduler()

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(analytics_ping,
                          'interval',
                          minutes=app.config["TELEMETRY_INTERVAL"],
                          args=[app])
        scheduler.start()

    # Start threaded file_permission_watcher
    # TODO: reconsider file permission approach
    # Note: process is never cleaned up, this is permissible because it's only
    # executed inside a container.

    watcher_file = "/tmp/file_permission_watcher_active"

    # guarantee no two python file permission watchers are started
    if not os.path.isfile(watcher_file):
        with open(watcher_file, "w") as file:
            file.write("1")

        file_dir = os.path.dirname(os.path.realpath(__file__))
        permission_process = Popen([
            os.path.join(file_dir, "scripts", "file_permission_watcher.py"),
            app.config["USER_DIR"]
        ])

        logging.info("Started file_permission_watcher.py")

    return app, socketio
示例#10
0
def create_app():

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)

    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    socketio = SocketIO(app, cors_allowed_origins="*")
    logging.getLogger("engineio").setLevel(logging.ERROR)

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception as e:
        logging.warning("Failed to load config.json")

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # create thread for non-cpu bound background tasks, e.g.
    # requests
    scheduler = BackgroundScheduler()
    app.config["SCHEDULER"] = scheduler

    logging.info("Flask CONFIG: %s" % app.config)

    db.init_app(app)
    ma.init_app(app)

    # according to SQLAlchemy will only create tables if they do not exist
    with app.app_context():
        db.create_all()
        # this class is only serialized on disk
        # see the Environment model
        # to avoid errors like querying the Environments table
        # to see if there is any environemnt (and getting
        # no environments while they might actually be there), the
        # table is deleted, so that it will produce a ~loud~ error
        Environment.__table__.drop(db.engine)
        initialize_default_datasources(db, app)

    # Telemetry
    if not app.config["TELEMETRY_DISABLED"]:
        # initialize posthog
        posthog.api_key = base64.b64decode(app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(
            analytics_ping,
            "interval",
            minutes=app.config["TELEMETRY_INTERVAL"],
            args=[app],
        )
        scheduler.start()

    # static file serving
    @app.route("/public/<path:path>")
    def send_files(path):
        return send_from_directory("../static", path)

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(db, socketio)
    register_analytics_views(app, db)

    processes = []

    if process_start_gate():

        file_dir = os.path.dirname(os.path.realpath(__file__))

        # TODO: reconsider file permission approach
        # file_permission_watcher process
        permission_process = Popen(
            [
                "python3",
                "-m",
                "scripts.file_permission_watcher",
                app.config["USER_DIR"],
            ],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )
        logging.info("Started file_permission_watcher.py")
        processes.append(permission_process)

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        logging.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes
示例#11
0
文件: __init__.py 项目: puqiu/orchest
def create_app(config_class=None, use_db=True, be_scheduler=False):
    """Create the Flask app and return it.

    Args:
        config_class: Configuration class. See orchest-api/app/config.
        use_db: If true, associate a database to the Flask app instance,
            which implies connecting to a given database and possibly
            creating such database and/or tables if they do not exist
            already. The reason to differentiate instancing the app
            through this argument is that the celery worker does not
            need to connect to the db that "belongs" to the orchest-api.
        be_scheduler: If true, a background thread will act as a job
            scheduler, according to the logic in core/scheduler. While
            Orchest runs, only a single process should be acting as
            scheduler.

    Returns:
        Flask.app
    """
    app = Flask(__name__)
    app.config.from_object(config_class)

    init_logging()

    # Cross-origin resource sharing. Allow API to be requested from the
    # different microservices such as the webserver.
    CORS(app, resources={r"/*": {"origins": "*"}})

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    if use_db:
        # Create the database if it does not exist yet. Roughly equal to
        # a "CREATE DATABASE IF NOT EXISTS <db_name>" call.
        if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
            create_database(app.config["SQLALCHEMY_DATABASE_URI"])

        db.init_app(app)
        # necessary for migration
        Migrate().init_app(app, db)

        with app.app_context():

            # Alembic does not support calling upgrade() concurrently
            if not is_werkzeug_parent():
                # Upgrade to the latest revision. This also takes
                # care of bringing an "empty" db (no tables) on par.
                try:
                    upgrade()
                except Exception as e:
                    logging.error("Failed to run upgrade() %s [%s]" % (e, type(e)))

            # In case of an ungraceful shutdown, these entities could be
            # in an invalid state, so they are deleted, since for sure
            # they are not running anymore.
            # To avoid the issue of entities being deleted because of a
            # flask app reload triggered by a --dev code change, we
            # attempt to create a directory first. Since this is an
            # atomic operation that will result in an error if the
            # directory is already there, this cleanup operation will
            # run only once per container.
            try:
                os.mkdir("/tmp/cleanup_done")
                InteractiveSession.query.delete()

                # Delete old JupyterBuilds on start to avoid
                # accumulation in the DB. Leave the latest such that the
                # user can see details about the last executed build
                # after restarting Orchest.
                jupyter_builds = (
                    JupyterBuild.query.order_by(JupyterBuild.requested_time.desc())
                    .offset(1)
                    .all()
                )

                # Can't use offset and .delete in conjunction in
                # sqlalchemy unfortunately.
                for jupyer_build in jupyter_builds:
                    db.session.delete(jupyer_build)

                db.session.commit()

                # Fix interactive runs.
                runs = InteractivePipelineRun.query.filter(
                    InteractivePipelineRun.status.in_(["PENDING", "STARTED"])
                ).all()
                with TwoPhaseExecutor(db.session) as tpe:
                    for run in runs:
                        AbortPipelineRun(tpe).transaction(run.uuid)

                # Fix one off jobs (and their pipeline runs).
                jobs = Job.query.filter_by(schedule=None, status="STARTED").all()
                with TwoPhaseExecutor(db.session) as tpe:
                    for job in jobs:
                        AbortJob(tpe).transaction(job.uuid)

                # This is to fix the state of cron jobs pipeline runs.
                runs = NonInteractivePipelineRun.query.filter(
                    NonInteractivePipelineRun.status.in_(["STARTED"])
                ).all()
                with TwoPhaseExecutor(db.session) as tpe:
                    for run in runs:
                        AbortPipelineRun(tpe).transaction(run.uuid)

                # Fix env builds.
                builds = EnvironmentBuild.query.filter(
                    EnvironmentBuild.status.in_(["PENDING", "STARTED"])
                ).all()
                with TwoPhaseExecutor(db.session) as tpe:
                    for build in builds:
                        AbortEnvironmentBuild(tpe).transaction(build.uuid)

                # Fix jupyter builds.
                builds = JupyterBuild.query.filter(
                    JupyterBuild.status.in_(["PENDING", "STARTED"])
                ).all()
                with TwoPhaseExecutor(db.session) as tpe:
                    for build in builds:
                        AbortJupyterBuild(tpe).transaction(build.uuid)

                # Trigger a build of JupyterLab if no JupyterLab image
                # is found for this version and JupyterLab setup_script
                # is non-empty.
                trigger_conditional_jupyter_build(app)

            except FileExistsError:
                app.logger.info("/tmp/cleanup_done exists. Skipping cleanup.")
            except Exception as e:
                app.logger.error("Cleanup failed")
                app.logger.error(e)

    if be_scheduler and not is_werkzeug_parent():
        # Create a scheduler and have the scheduling logic running
        # periodically.
        scheduler = BackgroundScheduler(
            job_defaults={
                # Infinite amount of grace time, so that if a task
                # cannot be instantly executed (e.g. if the webserver is
                # busy) then it will eventually be.
                "misfire_grace_time": 2 ** 31,
                "coalesce": False,
                # So that the same job can be in the queue an infinite
                # amount of times, e.g. for concurrent requests issuing
                # the same tasks.
                "max_instances": 2 ** 31,
            }
        )
        app.config["SCHEDULER"] = scheduler
        scheduler.start()
        scheduler.add_job(
            Scheduler.check_for_jobs_to_be_scheduled,
            "interval",
            seconds=app.config["SCHEDULER_INTERVAL"],
            args=[app],
        )

    # Register blueprints.
    app.register_blueprint(api, url_prefix="/api")

    return app
示例#12
0
def create_app():
    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    init_logging()

    socketio = SocketIO(app, cors_allowed_origins="*")

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception:
        app.logger.warning("Failed to load config.json")

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # create thread for non-cpu bound background tasks, e.g. requests
    scheduler = BackgroundScheduler(
        job_defaults={
            # Infinite amount of grace time, so that if a task cannot be
            # instantly executed (e.g. if the webserver is busy) then it
            # will eventually be.
            "misfire_grace_time": 2**31,
            "coalesce": False,
            # So that the same job can be in the queue an infinite
            # amount of times, e.g. for concurrent requests issuing the
            # same tasks.
            "max_instances": 2**31,
        })
    app.config["SCHEDULER"] = scheduler
    scheduler.start()

    app.logger.info("Flask CONFIG: %s" % app.config)

    # Create the database if it does not exist yet. Roughly equal to a
    # "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])
    db.init_app(app)
    ma.init_app(app)
    # necessary for migration
    Migrate().init_app(app, db)

    with app.app_context():

        # Alembic does not support calling upgrade() concurrently
        if not is_werkzeug_parent():
            # Upgrade to the latest revision. This also takes care of
            # bringing an "empty" db (no tables) on par.
            try:
                upgrade()
            except Exception as e:
                logging.error("Failed to run upgrade() %s [%s]" % (e, type(e)))

            initialize_default_datasources(db, app)

    # Telemetry
    if not app.config["TELEMETRY_DISABLED"]:
        # initialize posthog
        posthog.api_key = base64.b64decode(
            app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(
            analytics_ping,
            "interval",
            minutes=app.config["TELEMETRY_INTERVAL"],
            args=[app],
        )

    # static file serving
    @app.route("/public/<path:path>")
    def send_files(path):
        return send_from_directory("../static", path)

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(db, socketio)
    register_analytics_views(app, db)

    processes = []

    if not is_werkzeug_parent():

        file_dir = os.path.dirname(os.path.realpath(__file__))

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        app.logger.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes
示例#13
0
def create_app(config_class=None, use_db=True, be_scheduler=False):
    """Create the Flask app and return it.

    Args:
        config_class: Configuration class. See orchest-api/app/config.
        use_db: If true, associate a database to the Flask app instance,
            which implies connecting to a given database and possibly
            creating such database and/or tables if they do not exist
            already. The reason to differentiate instancing the app
            through this argument is that the celery worker does not
            need to connect to the db that "belongs" to the orchest-api.
        be_scheduler: If true, a background thread will act as a job
            scheduler, according to the logic in core/scheduler. While
            Orchest runs, only a single process should be acting as
            scheduler.

    Returns:
        Flask.app
    """
    app = Flask(__name__)
    app.config.from_object(config_class)

    init_logging()

    # Cross-origin resource sharing. Allow API to be requested from the
    # different microservices such as the webserver.
    CORS(app, resources={r"/*": {"origins": "*"}})

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    if use_db:
        # Create the database if it does not exist yet. Roughly equal to
        # a "CREATE DATABASE IF NOT EXISTS <db_name>" call.
        if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
            create_database(app.config["SQLALCHEMY_DATABASE_URI"])

        db.init_app(app)
        # necessary for migration
        Migrate().init_app(app, db)

        with app.app_context():

            # Alembic does not support calling upgrade() concurrently
            if not is_werkzeug_parent():
                # Upgrade to the latest revision. This also takes
                # care of bringing an "empty" db (no tables) on par.
                try:
                    upgrade()
                except Exception as e:
                    logging.error("Failed to run upgrade() %s [%s]" %
                                  (e, type(e)))

            # In case of an ungraceful shutdown, these entities could be
            # in an invalid state, so they are deleted, since for sure
            # they are not running anymore.
            # To avoid the issue of entities being deleted because of a
            # flask app reload triggered by a dev mode code change, we
            # attempt to create a directory first. Since this is an
            # atomic operation that will result in an error if the
            # directory is already there, this cleanup operation will
            # run only once per container.
            try:
                os.mkdir("/tmp/interactive_cleanup_done")
                InteractiveSession.query.delete()
                InteractivePipelineRun.query.filter(
                    InteractivePipelineRun.status.in_(
                        ["PENDING",
                         "STARTED"])).delete(synchronize_session="fetch")
                db.session.commit()
            except FileExistsError:
                pass

    if be_scheduler:
        # Create a scheduler and have the scheduling logic running
        # periodically.
        scheduler = BackgroundScheduler(
            job_defaults={
                # Infinite amount of grace time, so that if a task
                # cannot be instantly executed (e.g. if the webserver is
                # busy) then it will eventually be.
                "misfire_grace_time": 2**31,
                "coalesce": False,
                # So that the same job can be in the queue an infinite
                # amount of times, e.g. for concurrent requests issuing
                # the same tasks.
                "max_instances": 2**31,
            })
        app.config["SCHEDULER"] = scheduler
        scheduler.start()
        scheduler.add_job(
            Scheduler.check_for_jobs_to_be_scheduled,
            "interval",
            seconds=app.config["SCHEDULER_INTERVAL"],
            args=[app],
        )

    # Register blueprints.
    app.register_blueprint(api, url_prefix="/api")

    return app
示例#14
0
def create_app(to_migrate_db=False):
    """Create the Flask app and return it.

    Args:
        to_migrate_db: If True, then only initialize the db.

    Returns:
        Flask.app
    """
    signal.signal(signal.SIGTERM, lambda *args, **kwargs: sys.exit(0))
    app = Flask(__name__)
    app.config.from_object(config.CONFIG_CLASS)

    init_logging()

    # In development we want more verbose logging of every request.
    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    socketio = SocketIO(app, cors_allowed_origins="*")

    if not to_migrate_db:
        orchest_config = requests.get(
            f"http://{config.CONFIG_CLASS.ORCHEST_API_ADDRESS}/api/ctl/orchest-settings"
        ).json()
        app.config.update(orchest_config)

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # Create the database if it does not exist yet. Roughly equal to a
    # "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])
    db.init_app(app)
    ma.init_app(app)
    # Necessary for DB migrations.
    Migrate().init_app(app, db)

    # NOTE: In this case we want to return ASAP as otherwise the DB
    # might be called (inside this function) before it is migrated.
    if to_migrate_db:
        return app, None, None

    # Add below `to_migrate_db` check, otherwise it will get logged
    # twice. Because before the app starts we first migrate.
    app.logger.info("Flask CONFIG: %s" % app.config)

    # Initialize posthog ASAP, at least before setting up the scheduler
    # but after `to_migrate_db`.
    if not app.config["TELEMETRY_DISABLED"]:
        posthog.api_key = base64.b64decode(
            app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

    if not _utils.is_running_from_reloader():
        with app.app_context():
            try:
                if app.config.get("TESTING", False):
                    # Do nothing.
                    # In case of tests we always want to run cleanup.
                    # Because every test will get a clean app, the same
                    # code should run for all tests.
                    pass
                else:
                    app.logger.debug(
                        "Trying to create /tmp/webserver_init_lock")
                    os.mkdir("/tmp/webserver_init_lock")
                    app.logger.info(
                        "/tmp/webserver_init_lock successfully created.")
            except FileExistsError:
                app.logger.info("/tmp/webserver_init_lock already exists.")
            else:
                jupyter_boot_lock_path = os.path.join(
                    "/userdir", _config.JUPYTER_USER_CONFIG, "lab",
                    ".bootlock")
                if os.path.exists(jupyter_boot_lock_path):
                    app.logger.info("Removing dangling jupyter boot lock.")
                    os.rmdir(jupyter_boot_lock_path)

                # On startup all kernels are refreshed. This is because
                # updating Orchest might make the kernels in the
                # userdir/.orchest/kernels directory invalid.
                projs = Project.query.all()
                for proj in projs:
                    try:
                        populate_kernels(app, db, proj.uuid)
                    except Exception as e:
                        logging.error("Failed to populate kernels on startup"
                                      " for project %s: %s [%s]" %
                                      (proj.uuid, e, type(e)))

    # create thread for non-cpu bound background tasks, e.g. requests
    scheduler = BackgroundScheduler(
        job_defaults={
            # Infinite amount of grace time, so that if a task cannot be
            # instantly executed (e.g. if the webserver is busy) then it
            # will eventually be.
            "misfire_grace_time": 2**31,
            "coalesce": False,
            # So that the same job can be in the queue an infinite
            # amount of times, e.g. for concurrent requests issuing the
            # same tasks.
            "max_instances": 2**31,
        })
    app.config["SCHEDULER"] = scheduler
    add_recurring_jobs_to_scheduler(scheduler, app, run_on_add=True)
    scheduler.start()

    # static file serving
    @app.route("/", defaults={"path": ""}, methods=["GET"])
    @app.route("/<path:path>", methods=["GET"])
    def index(path):
        file_path = safe_join(app.config["STATIC_DIR"], path)
        if os.path.isfile(file_path):
            return send_from_directory(app.config["STATIC_DIR"], path)
        else:
            return send_from_directory(app.config["STATIC_DIR"],
                                       "index.html",
                                       cache_timeout=0)

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(socketio)
    register_analytics_views(app, db)

    processes = []

    if (os.environ.get("FLASK_ENV") != "development"
            or _utils.is_running_from_reloader()):
        file_dir = os.path.dirname(os.path.realpath(__file__))

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        app.logger.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes