示例#1
0
    def build(self):
        try:
            # if self.fast_check():
            #     return

            self.auxiliary = {'time': now()}

            self.create_base()

            self.process_stop_tasks()

            self.process_start_dags()

            self.process_parent_tasks()

            self.load_tasks()

            self.load_computers()

            self.process_tasks()

            self.write_auxiliary()

        except ObjectDeletedError:
            pass
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='SupervisorBuilder')
                self.session = Session.create_session(key='SupervisorBuilder')
                self.logger = create_logger(self.session, 'SupervisorBuilder')

            self.logger.error(traceback.format_exc(), ComponentType.Supervisor)
示例#2
0
    def build(self):
        try:
            self.create_base()

            self.check_status()

            self.change_status()

            self.download()

            self.create_executor()

            self.execute()

        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='ExecuteBuilder')
                self.session = Session.create_session(key='ExecuteBuilder')
                self.logger.session = create_logger(self.session,
                                                    'ExecuteBuilder')

            step = self.executor.step.id if \
                (self.executor and self.executor.step) else None

            self.error(traceback.format_exc(), step)
            self.provider.change_status(self.task, TaskStatus.Failed)
            raise e
        finally:
            if app.current_task:
                app.current_task.update_state(state=states.SUCCESS)
                app.close()

            if self.exit:
                # noinspection PyProtectedMember
                os._exit(0)
示例#3
0
文件: signals.py 项目: xyuan/mlcomp
 def decorated(*args, **kwargs):
     global _session
     try:
         f(*args, **kwargs)
     except Exception as e:
         if Session.sqlalchemy_error(e):
             Session.cleanup(key=__name__)
             _session = Session.create_session(key=__name__)
         raise e
示例#4
0
文件: sync.py 项目: ASRlytics/mlcomp
    def sync(self):
        hostname = socket.gethostname()
        try:
            provider = ComputerProvider(self.session)
            task_synced_provider = TaskSyncedProvider(self.session)

            computer = provider.by_name(hostname)
            sync_start = now()

            if FILE_SYNC_INTERVAL == 0:
                time.sleep(1)
            else:
                computers = provider.all_with_last_activtiy()
                computers = [
                    c for c in computers
                    if (now() - c.last_activity).total_seconds() < 10
                ]
                computers_names = {c.name for c in computers}

                for c, project, tasks in task_synced_provider.for_computer(
                        computer.name):
                    if c.name not in computers_names:
                        self.logger.info(
                            f'Computer = {c.name} '
                            f'is offline. Can not sync',
                            ComponentType.WorkerSupervisor, hostname)
                        continue

                    if c.syncing_computer:
                        continue

                    excluded = list(map(str,
                                        yaml_load(project.ignore_folders)))
                    folders_excluded = [[join('data', project.name), excluded],
                                        [join('models', project.name), []]]

                    computer.syncing_computer = c.name
                    provider.update()
                    sync_directed(self.session, c, computer, folders_excluded)

                    for t in tasks:
                        task_synced_provider.add(
                            TaskSynced(computer=computer.name, task=t.id))

                    time.sleep(FILE_SYNC_INTERVAL)

            computer.last_synced = sync_start
            computer.syncing_computer = None
            provider.update()
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('FileSync')
                self.session = Session.create_session(key='FileSync')
                self.logger = create_logger(self.session, 'FileSync')

            self.logger.error(traceback.format_exc(),
                              ComponentType.WorkerSupervisor, hostname)
示例#5
0
    def process_error(self, e: Exception):
        if Session.sqlalchemy_error(e):
            Session.cleanup('FileSync')
            self.session = Session.create_session(key='FileSync')
            self.logger = create_logger(self.session, 'FileSync')

        hostname = socket.gethostname()
        self.logger.error(
            traceback.format_exc(), ComponentType.WorkerSupervisor,
            hostname
        )
示例#6
0
    def wrapper():
        try:
            f(wrapper_vars['session'], wrapper_vars['logger'])
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(name)

                wrapper_vars['session'] = Session.create_session(key=name)
                wrapper_vars['logger'] = create_logger(wrapper_vars['session'],
                                                       name)

            wrapper_vars['logger'].error(traceback.format_exc(),
                                         ComponentType.WorkerSupervisor,
                                         hostname)
示例#7
0
def stop(logger, session: Session, task: Task, dag: Dag):
    provider = TaskProvider(session)
    if task.status > TaskStatus.InProgress.value:
        return task.status

    status = TaskStatus.Stopped
    try:
        if task.status != TaskStatus.NotRan.value:
            app.control.revoke(task.celery_id, terminate=True)
        else:
            status = TaskStatus.Skipped
    except Exception as e:
        if Session.sqlalchemy_error(e):
            try:
                logger.error(traceback.format_exc(), ComponentType.API)
            except Exception:
                pass
            raise
        logger.error(traceback.format_exc(), ComponentType.API)
    finally:
        if task.pid:
            queue = f'{task.computer_assigned}_' \
                    f'{dag.docker_img or "default"}_supervisor'
            kill.apply_async((task.pid, ), queue=queue, retry=False)

            additional_info = yaml_load(task.additional_info)
            for p in additional_info.get('child_processes', []):
                kill.apply_async((p, ), queue=queue, retry=False)
        provider.change_status(task, status)

    return task.status
示例#8
0
文件: base.py 项目: xyuan/mlcomp
 def __init__(self, session: Session = None):
     if session is None:
         session = Session.create_session()
     self._session = session
     self.serializer = Serializer(date_format=self.date_format,
                                  datetime_format=self.datetime_format,
                                  time_format=self.time_format)
示例#9
0
文件: tests.py 项目: xyuan/mlcomp
def session():
    if ROOT_FOLDER:
        shutil.rmtree(ROOT_FOLDER)
        reload(mlcomp)

    migrate()
    res = Session.create_session()
    yield res
示例#10
0
 def __init__(self):
     self.session = Session.create_session(key='SupervisorBuilder')
     self.logger = create_logger(self.session, 'SupervisorBuilder')
     self.provider = None
     self.computer_provider = None
     self.docker_provider = None
     self.auxiliary_provider = None
     self.dag_provider = None
     self.queues = None
     self.not_ran_tasks = None
     self.dep_status = None
     self.computers = None
     self.auxiliary = {}
示例#11
0
def find_imports(path: str,
                 files: List[str] = None,
                 exclude_patterns: List[str] = None,
                 encoding='utf-8'):
    res = []
    raw_imports = []
    files = files if files is not None \
        else glob(os.path.join(path, '**', '*.py'), recursive=True)

    exclude_patterns = exclude_patterns \
        if exclude_patterns is not None else []
    spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern,
                                        exclude_patterns)

    for file in files:
        if not file.endswith('.py'):
            continue
        file_rel = os.path.relpath(file, path)
        if spec.match_file(file_rel):
            continue

        with open(file, 'r', encoding=encoding) as f:
            content = f.read()
            try:
                tree = ast.parse(content)
                for node in ast.walk(tree):
                    if isinstance(node, ast.Import):
                        for subnode in node.names:
                            raw_imports.append((subnode.name, file_rel))
                    elif isinstance(node, ast.ImportFrom):
                        raw_imports.append((node.module, file_rel))
            except Exception as exc:
                logger = create_logger(Session.create_session(), __name__)
                logger.error('Failed on file: %s' % file_rel)
                raise exc

    for lib, file in raw_imports:
        name = lib.split('.')[0]
        try:
            if name in _mapping:
                name = _mapping[name]

            version = pkg_resources.get_distribution(name).version
            res.append((name, version))
        except Exception:
            pass

    return res
示例#12
0
文件: tasks.py 项目: kiminh/mlcomp
    def __init__(self, id: int, repeat_count: int = 1, exit=True):
        self.session = Session.create_session(key='ExecuteBuilder')
        self.id = id
        self.repeat_count = repeat_count
        self.logger = create_logger(self.session, 'ExecuteBuilder')
        self.exit = exit

        self.provider = None
        self.library_provider = None
        self.storage = None
        self.task = None
        self.dag = None
        self.executor = None
        self.hostname = None
        self.docker_img = None
        self.worker_index = None
        self.queue_personal = None
        self.config = None
        self.executor_type = None
示例#13
0
文件: 002_data.py 项目: xyuan/mlcomp
def upgrade(migrate_engine):
    folder = os.path.dirname(__file__)
    session = Session.create_session(connection_string=migrate_engine.url)
    provider = ReportLayoutProvider(session)

    try:
        files = os.path.join(folder, '002', 'report_layout', '*.yml')
        for path in glob(files):
            name = str(os.path.basename(path).split('.')[0])
            text = open(path).read()
            provider.add(ReportLayout(name=name,
                                      content=text,
                                      last_modified=now()),
                         commit=False)

        provider.commit()
    except Exception:
        provider.rollback()
        raise
示例#14
0
def error_handler(f):
    name = f.__name__
    wrapper_vars = {'session': Session.create_session(key=name)}
    wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name)

    hostname = socket.gethostname()

    def wrapper():
        try:
            f(wrapper_vars['session'], wrapper_vars['logger'])
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(name)

                wrapper_vars['session'] = Session.create_session(key=name)
                wrapper_vars['logger'] = create_logger(wrapper_vars['session'],
                                                       name)

            wrapper_vars['logger'].error(traceback.format_exc(),
                                         ComponentType.WorkerSupervisor,
                                         hostname)

    return wrapper
示例#15
0
    def decorated(*args, **kwargs):
        global _read_session, _write_session, logger

        success = True
        status = 200
        error = ''

        try:
            res = f(*args, **kwargs)
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('server.read')
                Session.cleanup('server.write')

                _read_session = Session.create_session(key='server.read')
                _write_session = Session.create_session(key='server.write')

                logger = create_logger(_write_session, __name__)

            logger.error(
                f'Requested Url: {request.path}\n\n{traceback.format_exc()}',
                ComponentType.API
            )

            error = traceback.format_exc()
            success = False
            status = 500
            res = None

        res = res or {}
        if isinstance(res, Response):
            return res

        res['success'] = success
        res['error'] = error

        return Response(json.dumps(res), status=status)
示例#16
0
文件: sync.py 项目: shlemph/mlcomp
class FileSync:
    session = Session.create_session(key='FileSync')
    logger = create_logger(session, 'FileSync')

    def sync_manual(self, computer: Computer, provider: ComputerProvider):
        """
        button sync was clicked manually
        """
        if not computer.meta:
            return

        meta = yaml_load(computer.meta)
        if 'manual_sync' not in meta:
            return

        manual_sync = meta['manual_sync']

        project_provider = ProjectProvider(self.session)
        docker_provider = DockerProvider(self.session)

        dockers = docker_provider.get_online()
        project = project_provider.by_id(manual_sync['project'])

        for docker in dockers:
            if docker.computer == computer.name:
                continue

            source = provider.by_name(docker.computer)
            ignore_folders = [
                [join('models', project.name), []]
            ]
            sync_directed(self.session, target=computer, source=source,
                          ignore_folders=ignore_folders)

        del meta['manual_sync']
        computer.meta = yaml_dump(meta)
        provider.update()

    def sync(self):
        hostname = socket.gethostname()
        try:
            provider = ComputerProvider(self.session)
            task_synced_provider = TaskSyncedProvider(self.session)

            computer = provider.by_name(hostname)
            sync_start = now()

            if FILE_SYNC_INTERVAL == 0:
                time.sleep(1)
            else:
                self.sync_manual(computer, provider)

                computers = provider.all_with_last_activtiy()
                computers = [
                    c for c in computers
                    if (now() - c.last_activity).total_seconds() < 10
                ]
                computers_names = {c.name for c in computers}

                for c, project, tasks in task_synced_provider.for_computer(
                        computer.name):
                    if c.sync_with_this_computer:
                        if c.name not in computers_names:
                            self.logger.info(f'Computer = {c.name} '
                                             f'is offline. Can not sync',
                                             ComponentType.WorkerSupervisor,
                                             hostname)
                            continue

                        if c.syncing_computer:
                            continue

                        ignore_folders = [
                            [join('models', project.name), []]
                        ]

                        computer.syncing_computer = c.name
                        provider.update()

                        sync_directed(self.session, c, computer,
                                      ignore_folders)

                    for t in tasks:
                        task_synced_provider.add(
                            TaskSynced(computer=computer.name, task=t.id)
                        )

                    time.sleep(FILE_SYNC_INTERVAL)

            computer.last_synced = sync_start
            computer.syncing_computer = None
            provider.update()
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('FileSync')
                self.session = Session.create_session(key='FileSync')
                self.logger = create_logger(self.session, 'FileSync')

            self.logger.error(
                traceback.format_exc(), ComponentType.WorkerSupervisor,
                hostname
            )
示例#17
0
    DOCKER_IMG, DOCKER_MAIN, IP, PORT, WORKER_USAGE_INTERVAL, \
    SYNC_WITH_THIS_COMPUTER, CAN_PROCESS_TASKS
from mlcomp.db.core import Session
from mlcomp.db.enums import ComponentType, TaskStatus
from mlcomp.utils.logging import create_logger
from mlcomp.db.providers import DockerProvider, TaskProvider
from mlcomp.utils.schedule import start_schedule
from mlcomp.utils.misc import dict_func, now, disk, get_username, \
    kill_child_processes
from mlcomp.worker.app import app
from mlcomp.db.providers import ComputerProvider
from mlcomp.db.models import ComputerUsage, Computer, Docker
from mlcomp.utils.misc import memory
from mlcomp.worker.sync import FileSync

_session = Session.create_session(key='worker')


@click.group()
def main():
    pass


def error_handler(f):
    name = f.__name__
    wrapper_vars = {'session': Session.create_session(key=name)}
    wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name)

    hostname = socket.gethostname()

    def wrapper():
示例#18
0
from mlcomp.db.providers import \
    ComputerProvider, \
    TaskProvider, \
    StepProvider, \
    ProjectProvider, DockerProvider
from mlcomp.report import create_report, check_statuses
from mlcomp.utils.config import merge_dicts_smart, dict_from_list_str
from mlcomp.utils.logging import create_logger
from mlcomp.worker.executors.kaggle import Submit
from mlcomp.worker.sync import sync_directed, correct_folders
from mlcomp.worker.tasks import execute_by_id
from mlcomp.utils.misc import memory, disk, get_username, \
    get_default_network_interface, now
from mlcomp.server.back.create_dags import dag_standard, dag_pipe

_session = Session.create_session(key=__name__)


def _dag(config: str,
         debug: bool = False,
         control_reqs=True,
         params: Tuple[str] = ()):
    logger = create_logger(_session, name='_dag')
    logger.info('started', ComponentType.Client)

    config_text = open(config, 'r').read()
    config_parsed = yaml_load(config_text)
    params = dict_from_list_str(params)
    config_parsed = merge_dicts_smart(config_parsed, params)
    config_text = yaml_dump(config_parsed)
示例#19
0
文件: kaggle.py 项目: shlemph/mlcomp
import socket

from kaggle.models import DatasetNewRequest

from mlcomp.db.core import Session
from mlcomp.db.enums import ComponentType
from mlcomp.db.providers import ModelProvider
from mlcomp.worker.executors.base.equation import Equation
from mlcomp.worker.executors.base.executor import Executor
from mlcomp.utils.logging import create_logger
from mlcomp.utils.config import Config

try:
    from kaggle import api
except OSError:
    logger = create_logger(Session.create_session(), __name__)
    logger.warning(
        'Could not find kaggle.json. '
        'Kaggle executors can not be used', ComponentType.Worker,
        socket.gethostname())


class DownloadType(Enum):
    Kaggle = 0
    Link = 1


@Executor.register
class Download(Executor):
    def __init__(self,
                 output: str,
示例#20
0
文件: 002_data.py 项目: xyuan/mlcomp
def downgrade(migrate_engine):
    session = Session.create_session(connection_string=migrate_engine.url)
    provider = ReportLayoutProvider(session)
    provider.session.query(ReportLayout).delete(synchronize_session=False)
    provider.session.commit()
示例#21
0
文件: app.py 项目: xang1234/mlcomp
    DagProvider, DagStorageProvider, TaskProvider, LogProvider, StepProvider, \
    FileProvider, AuxiliaryProvider
from mlcomp.db.report_info import ReportLayoutInfo
from mlcomp.server.back.supervisor import register_supervisor
from mlcomp.utils.logging import create_logger
from mlcomp.utils.io import from_module_path, zip_folder
from mlcomp.server.back.create_dags import dag_model_add, dag_model_start
from mlcomp.utils.misc import to_snake, now
from mlcomp.db.models import Model, Report, ReportLayout, Task
from mlcomp.utils.io import yaml_load, yaml_dump
from mlcomp.worker.storage import Storage

app = Flask(__name__)
CORS(app)

_read_session = Session.create_session(key='server.read')
_write_session = Session.create_session(key='server.write')

logger = create_logger(_write_session, __name__)


@app.route('/', defaults={'path': ''}, methods=['GET'])
@app.route('/<path:path>', methods=['GET'])
def send_static(path):
    file = 'index.html'
    if '.' in path:
        file = path

    module_path = from_module_path(__file__, f'../front/dist/mlcomp/')
    return send_from_directory(module_path, file)