Пример #1
0
def create_app():
    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    init_logging()

    socketio = SocketIO(app, cors_allowed_origins="*")

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception:
        app.logger.warning("Failed to load config.json")

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # create thread for non-cpu bound background tasks, e.g. requests
    scheduler = BackgroundScheduler(
        job_defaults={
            # Infinite amount of grace time, so that if a task cannot be
            # instantly executed (e.g. if the webserver is busy) then it
            # will eventually be.
            "misfire_grace_time": 2**31,
            "coalesce": False,
            # So that the same job can be in the queue an infinite
            # amount of times, e.g. for concurrent requests issuing the
            # same tasks.
            "max_instances": 2**31,
        })
    app.config["SCHEDULER"] = scheduler
    scheduler.start()

    app.logger.info("Flask CONFIG: %s" % app.config)

    # Create the database if it does not exist yet. Roughly equal to a
    # "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])
    db.init_app(app)
    ma.init_app(app)
    # necessary for migration
    Migrate().init_app(app, db)

    with app.app_context():

        # Alembic does not support calling upgrade() concurrently
        if not is_werkzeug_parent():
            # Upgrade to the latest revision. This also takes care of
            # bringing an "empty" db (no tables) on par.
            try:
                upgrade()
            except Exception as e:
                logging.error("Failed to run upgrade() %s [%s]" % (e, type(e)))

            initialize_default_datasources(db, app)

    # Telemetry
    if not app.config["TELEMETRY_DISABLED"]:
        # initialize posthog
        posthog.api_key = base64.b64decode(
            app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(
            analytics_ping,
            "interval",
            minutes=app.config["TELEMETRY_INTERVAL"],
            args=[app],
        )

    # static file serving
    @app.route("/public/<path:path>")
    def send_files(path):
        return send_from_directory("../static", path)

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(db, socketio)
    register_analytics_views(app, db)

    processes = []

    if not is_werkzeug_parent():

        file_dir = os.path.dirname(os.path.realpath(__file__))

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        app.logger.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes
Пример #2
0
def create_app():
    app = Flask(__name__)
    app.config.from_object(CONFIG_CLASS)

    init_logging()

    socketio = SocketIO(app, cors_allowed_origins="*")

    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    # read directory mount based config into Flask config
    try:
        conf_data = get_user_conf()
        app.config.update(conf_data)
    except Exception:
        app.logger.warning("Failed to load config.json")

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # create thread for non-cpu bound background tasks, e.g. requests
    scheduler = BackgroundScheduler(
        job_defaults={
            # Infinite amount of grace time, so that if a task cannot be
            # instantly executed (e.g. if the webserver is busy) then it
            # will eventually be.
            "misfire_grace_time": 2**31,
            "coalesce": False,
            # So that the same job can be in the queue an infinite
            # amount of times, e.g. for concurrent requests issuing the
            # same tasks.
            "max_instances": 2**31,
        })
    app.config["SCHEDULER"] = scheduler
    scheduler.start()

    app.logger.info("Flask CONFIG: %s" % app.config)

    # Create the database if it does not exist yet. Roughly equal to a
    # "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])
    db.init_app(app)
    ma.init_app(app)
    # necessary for migration
    Migrate().init_app(app, db)

    with app.app_context():

        # Alembic does not support calling upgrade() concurrently
        if not is_werkzeug_parent():
            # Upgrade to the latest revision. This also takes care of
            # bringing an "empty" db (no tables) on par.
            try:
                upgrade()
            except Exception as e:
                logging.error("Failed to run upgrade() %s [%s]" % (e, type(e)))

            # On startup all kernels are freshed. This is because
            # updating Orchest might make the kernels in the
            # userdir/.orchest/kernels directory invalid.
            projs = Project.query.all()
            for proj in projs:
                try:
                    populate_kernels(app, db, proj.uuid)
                except Exception as e:
                    logging.error(
                        "Failed to populate kernels on startup for project %s: %s [%s]"
                        % (proj.uuid, e, type(e)))

        # To avoid multiple removals in case of a flask --reload, so
        # that this code runs once per container.
        try:
            os.mkdir("/tmp/jupyter_lock_removed")
            lock_path = os.path.join("/userdir", _config.JUPYTER_USER_CONFIG,
                                     "lab", ".bootlock")
            if os.path.exists(lock_path):
                app.logger.info("Removing dangling jupyter boot lock.")
                os.rmdir(lock_path)

        except FileExistsError:
            app.logger.info("/tmp/jupyter_lock_removed exists. "
                            " Not removing the lock again.")

    # Telemetry
    if not app.config["TELEMETRY_DISABLED"]:
        # initialize posthog
        posthog.api_key = base64.b64decode(
            app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

        # send a ping now
        analytics_ping(app)

        # and every 15 minutes
        scheduler.add_job(
            analytics_ping,
            "interval",
            minutes=app.config["TELEMETRY_INTERVAL"],
            args=[app],
        )

    # static file serving
    @app.route("/", defaults={"path": ""}, methods=["GET"])
    @app.route("/<path:path>", methods=["GET"])
    def index(path):
        # in Debug mode proxy to CLIENT_DEV_SERVER_URL
        if os.environ.get("FLASK_ENV") == "development":
            return _proxy(request, app.config["CLIENT_DEV_SERVER_URL"] + "/")
        else:
            file_path = os.path.join(app.config["STATIC_DIR"], path)
            if os.path.isfile(file_path):
                return send_from_directory(app.config["STATIC_DIR"], path)
            else:
                return send_from_directory(app.config["STATIC_DIR"],
                                           "index.html")

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(socketio)
    register_analytics_views(app, db)

    processes = []

    if not is_werkzeug_parent():

        file_dir = os.path.dirname(os.path.realpath(__file__))

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        app.logger.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes
Пример #3
0
def create_app(to_migrate_db=False):
    """Create the Flask app and return it.

    Args:
        to_migrate_db: If True, then only initialize the db.

    Returns:
        Flask.app
    """
    signal.signal(signal.SIGTERM, lambda *args, **kwargs: sys.exit(0))
    app = Flask(__name__)
    app.config.from_object(config.CONFIG_CLASS)

    init_logging()

    # In development we want more verbose logging of every request.
    if os.getenv("FLASK_ENV") == "development":
        app = register_teardown_request(app)

    socketio = SocketIO(app, cors_allowed_origins="*")

    if not to_migrate_db:
        orchest_config = requests.get(
            f"http://{config.CONFIG_CLASS.ORCHEST_API_ADDRESS}/api/ctl/orchest-settings"
        ).json()
        app.config.update(orchest_config)

    app.config["ORCHEST_REPO_TAG"] = get_repo_tag()

    # Create the database if it does not exist yet. Roughly equal to a
    # "CREATE DATABASE IF NOT EXISTS <db_name>" call.
    if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]):
        create_database(app.config["SQLALCHEMY_DATABASE_URI"])
    db.init_app(app)
    ma.init_app(app)
    # Necessary for DB migrations.
    Migrate().init_app(app, db)

    # NOTE: In this case we want to return ASAP as otherwise the DB
    # might be called (inside this function) before it is migrated.
    if to_migrate_db:
        return app, None, None

    # Add below `to_migrate_db` check, otherwise it will get logged
    # twice. Because before the app starts we first migrate.
    app.logger.info("Flask CONFIG: %s" % app.config)

    # Initialize posthog ASAP, at least before setting up the scheduler
    # but after `to_migrate_db`.
    if not app.config["TELEMETRY_DISABLED"]:
        posthog.api_key = base64.b64decode(
            app.config["POSTHOG_API_KEY"]).decode()
        posthog.host = app.config["POSTHOG_HOST"]

    if not _utils.is_running_from_reloader():
        with app.app_context():
            try:
                if app.config.get("TESTING", False):
                    # Do nothing.
                    # In case of tests we always want to run cleanup.
                    # Because every test will get a clean app, the same
                    # code should run for all tests.
                    pass
                else:
                    app.logger.debug(
                        "Trying to create /tmp/webserver_init_lock")
                    os.mkdir("/tmp/webserver_init_lock")
                    app.logger.info(
                        "/tmp/webserver_init_lock successfully created.")
            except FileExistsError:
                app.logger.info("/tmp/webserver_init_lock already exists.")
            else:
                jupyter_boot_lock_path = os.path.join(
                    "/userdir", _config.JUPYTER_USER_CONFIG, "lab",
                    ".bootlock")
                if os.path.exists(jupyter_boot_lock_path):
                    app.logger.info("Removing dangling jupyter boot lock.")
                    os.rmdir(jupyter_boot_lock_path)

                # On startup all kernels are refreshed. This is because
                # updating Orchest might make the kernels in the
                # userdir/.orchest/kernels directory invalid.
                projs = Project.query.all()
                for proj in projs:
                    try:
                        populate_kernels(app, db, proj.uuid)
                    except Exception as e:
                        logging.error("Failed to populate kernels on startup"
                                      " for project %s: %s [%s]" %
                                      (proj.uuid, e, type(e)))

    # create thread for non-cpu bound background tasks, e.g. requests
    scheduler = BackgroundScheduler(
        job_defaults={
            # Infinite amount of grace time, so that if a task cannot be
            # instantly executed (e.g. if the webserver is busy) then it
            # will eventually be.
            "misfire_grace_time": 2**31,
            "coalesce": False,
            # So that the same job can be in the queue an infinite
            # amount of times, e.g. for concurrent requests issuing the
            # same tasks.
            "max_instances": 2**31,
        })
    app.config["SCHEDULER"] = scheduler
    add_recurring_jobs_to_scheduler(scheduler, app, run_on_add=True)
    scheduler.start()

    # static file serving
    @app.route("/", defaults={"path": ""}, methods=["GET"])
    @app.route("/<path:path>", methods=["GET"])
    def index(path):
        file_path = safe_join(app.config["STATIC_DIR"], path)
        if os.path.isfile(file_path):
            return send_from_directory(app.config["STATIC_DIR"], path)
        else:
            return send_from_directory(app.config["STATIC_DIR"],
                                       "index.html",
                                       cache_timeout=0)

    register_views(app, db)
    register_orchest_api_views(app, db)
    register_background_tasks_view(app, db)
    register_socketio_broadcast(socketio)
    register_analytics_views(app, db)

    processes = []

    if (os.environ.get("FLASK_ENV") != "development"
            or _utils.is_running_from_reloader()):
        file_dir = os.path.dirname(os.path.realpath(__file__))

        # log_streamer process
        log_streamer_process = Popen(
            ["python3", "-m", "scripts.log_streamer"],
            cwd=os.path.join(file_dir, ".."),
            stderr=subprocess.STDOUT,
        )

        app.logger.info("Started log_streamer.py")
        processes.append(log_streamer_process)

    return app, socketio, processes