Exemplo n.º 1
0
    def build(self):
        try:
            # if self.fast_check():
            #     return

            self.auxiliary = {'time': now()}

            self.create_base()

            self.process_stop_tasks()

            self.process_start_dags()

            self.process_parent_tasks()

            self.load_tasks()

            self.load_computers()

            self.process_tasks()

            self.write_auxiliary()

        except ObjectDeletedError:
            pass
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='SupervisorBuilder')
                self.session = Session.create_session(key='SupervisorBuilder')
                self.logger = create_logger(self.session, 'SupervisorBuilder')

            self.logger.error(traceback.format_exc(), ComponentType.Supervisor)
Exemplo n.º 2
0
def stop(logger, session: Session, task: Task, dag: Dag):
    provider = TaskProvider(session)
    if task.status > TaskStatus.InProgress.value:
        return task.status

    status = TaskStatus.Stopped
    try:
        if task.status != TaskStatus.NotRan.value:
            app.control.revoke(task.celery_id, terminate=True)
        else:
            status = TaskStatus.Skipped
    except Exception as e:
        if Session.sqlalchemy_error(e):
            try:
                logger.error(traceback.format_exc(), ComponentType.API)
            except Exception:
                pass
            raise
        logger.error(traceback.format_exc(), ComponentType.API)
    finally:
        if task.pid:
            queue = f'{task.computer_assigned}_' \
                    f'{dag.docker_img or "default"}_supervisor'
            kill.apply_async((task.pid, ), queue=queue, retry=False)

            additional_info = yaml_load(task.additional_info)
            for p in additional_info.get('child_processes', []):
                kill.apply_async((p, ), queue=queue, retry=False)
        provider.change_status(task, status)

    return task.status
Exemplo n.º 3
0
    def build(self):
        try:
            self.create_base()

            self.check_status()

            self.change_status()

            self.download()

            self.create_executor()

            self.execute()

        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='ExecuteBuilder')
                self.session = Session.create_session(key='ExecuteBuilder')
                self.logger.session = create_logger(self.session,
                                                    'ExecuteBuilder')

            step = self.executor.step.id if \
                (self.executor and self.executor.step) else None

            self.error(traceback.format_exc(), step)
            self.provider.change_status(self.task, TaskStatus.Failed)
            raise e
        finally:
            if app.current_task:
                app.current_task.update_state(state=states.SUCCESS)
                app.close()

            if self.exit:
                # noinspection PyProtectedMember
                os._exit(0)
Exemplo n.º 4
0
 def decorated(*args, **kwargs):
     global _session
     try:
         f(*args, **kwargs)
     except Exception as e:
         if Session.sqlalchemy_error(e):
             Session.cleanup(key=__name__)
             _session = Session.create_session(key=__name__)
         raise e
Exemplo n.º 5
0
    def sync(self):
        hostname = socket.gethostname()
        try:
            provider = ComputerProvider(self.session)
            task_synced_provider = TaskSyncedProvider(self.session)

            computer = provider.by_name(hostname)
            sync_start = now()

            if FILE_SYNC_INTERVAL == 0:
                time.sleep(1)
            else:
                computers = provider.all_with_last_activtiy()
                computers = [
                    c for c in computers
                    if (now() - c.last_activity).total_seconds() < 10
                ]
                computers_names = {c.name for c in computers}

                for c, project, tasks in task_synced_provider.for_computer(
                        computer.name):
                    if c.name not in computers_names:
                        self.logger.info(
                            f'Computer = {c.name} '
                            f'is offline. Can not sync',
                            ComponentType.WorkerSupervisor, hostname)
                        continue

                    if c.syncing_computer:
                        continue

                    excluded = list(map(str,
                                        yaml_load(project.ignore_folders)))
                    folders_excluded = [[join('data', project.name), excluded],
                                        [join('models', project.name), []]]

                    computer.syncing_computer = c.name
                    provider.update()
                    sync_directed(self.session, c, computer, folders_excluded)

                    for t in tasks:
                        task_synced_provider.add(
                            TaskSynced(computer=computer.name, task=t.id))

                    time.sleep(FILE_SYNC_INTERVAL)

            computer.last_synced = sync_start
            computer.syncing_computer = None
            provider.update()
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('FileSync')
                self.session = Session.create_session(key='FileSync')
                self.logger = create_logger(self.session, 'FileSync')

            self.logger.error(traceback.format_exc(),
                              ComponentType.WorkerSupervisor, hostname)
Exemplo n.º 6
0
    def process_error(self, e: Exception):
        if Session.sqlalchemy_error(e):
            Session.cleanup('FileSync')
            self.session = Session.create_session(key='FileSync')
            self.logger = create_logger(self.session, 'FileSync')

        hostname = socket.gethostname()
        self.logger.error(
            traceback.format_exc(), ComponentType.WorkerSupervisor,
            hostname
        )
Exemplo n.º 7
0
    def wrapper():
        try:
            f(wrapper_vars['session'], wrapper_vars['logger'])
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(name)

                wrapper_vars['session'] = Session.create_session(key=name)
                wrapper_vars['logger'] = create_logger(wrapper_vars['session'],
                                                       name)

            wrapper_vars['logger'].error(traceback.format_exc(),
                                         ComponentType.WorkerSupervisor,
                                         hostname)
Exemplo n.º 8
0
    def decorated(*args, **kwargs):
        global _read_session, _write_session, logger

        success = True
        status = 200
        error = ''

        try:
            res = f(*args, **kwargs)
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('server.read')
                Session.cleanup('server.write')

                _read_session = Session.create_session(key='server.read')
                _write_session = Session.create_session(key='server.write')

                logger = create_logger(_write_session, __name__)

            logger.error(
                f'Requested Url: {request.path}\n\n{traceback.format_exc()}',
                ComponentType.API
            )

            error = traceback.format_exc()
            success = False
            status = 500
            res = None

        res = res or {}
        if isinstance(res, Response):
            return res

        res['success'] = success
        res['error'] = error

        return Response(json.dumps(res), status=status)