示例#1
0
def logs(statuses, folder: str = None):
    if folder is not None:
        for file in glob(join(LOG_FOLDER, '*')):
            shutil.copy(file, join(folder, basename(file)))
        print('logs formed')

    if statuses.query('status == "ERROR"').shape[0] > 0:
        return

    log_provider = LogProvider()
    errors = log_provider.last(count=1000, levels=[LogStatus.Error.value])
    service_components = [
        ComponentType.Supervisor.value, ComponentType.API.value,
        ComponentType.WorkerSupervisor.value
    ]
    services = log_provider.last(count=1000, components=service_components)
    logs = errors + services

    rows = []
    for l, _ in logs:
        rows.append({
            'status': to_snake(LogStatus(l.level).name),
            'component': to_snake(ComponentType(l.component).name),
            'time': l.time,
            'message': l.message,
        })
    df = pd.DataFrame(rows)
    df.to_csv(join(folder, 'logs_db.csv'), index=False)
    return df
示例#2
0
文件: dag.py 项目: xyuan/mlcomp
    def graph(self, id: int):
        tasks = self.query(Task). \
            filter(Task.dag == id). \
            filter(Task.type <= TaskType.Train.value). \
            all()

        task_ids = [t.id for t in tasks]
        dep = self.query(TaskDependence).filter(
            TaskDependence.task_id.in_(task_ids)).all()
        task_by_id = {t.id: t for t in tasks}

        def label(t: Task):
            res = [t.executor]
            if t.status >= TaskStatus.InProgress.value:
                res.append(self.duration(t))
                res.append(f'{t.current_step if t.current_step else ""}/'
                           f'{t.steps if t.steps else ""}')
            return '\n'.join(res)

        nodes = [{
            'id': t.id,
            'label': label(t),
            'name': t.name,
            'status': to_snake(TaskStatus(t.status).name)
        } for t in tasks]
        edges = [{
            'from':
            d.depend_id,
            'to':
            d.task_id,
            'status':
            to_snake(TaskStatus(task_by_id[d.depend_id].status).name)
        } for d in dep]
        return {'nodes': nodes, 'edges': edges}
示例#3
0
文件: describe.py 项目: xyuan/mlcomp
def describe_logs(dag: int,
                  axis,
                  max_log_text: int = None,
                  log_count: int = 5,
                  col_withds: List[float] = None):
    columns = ['Component', 'Level', 'Task', 'Time', 'Text']
    provider = LogProvider()
    logs = provider.last(log_count, dag=dag)

    res = []

    cells = []
    cells_colours = []

    for log, task_id in logs:
        component = to_snake(ComponentType(log.component).name)

        level = log.level
        level = 'debug' if level == 10 else 'info' \
            if level == 20 else 'warning' \
            if level == 30 else 'error'
        message = log.message
        if max_log_text:
            message = message[:max_log_text]
        log_cells = [
            component, level,
            str(task_id),
            log.time.strftime('%m.%d %H:%M:%S'), message
        ]

        cells.append(log_cells)

        level_color = 'lightblue' if level == 'info' else 'lightyellow' \
            if level == 'warning' else 'red' if level == 'error' else 'white'

        log_colours = ['white', level_color, 'white', 'white', 'white']
        cells_colours.append(log_colours)

        if level == 'error':
            res.append(log)

    col_withds = col_withds or [0.2, 0.1, 0.25, 0.2, 0.45]
    if len(cells) > 0:
        table = axis.table(cellText=cells,
                           colLabels=columns,
                           cellColours=cells_colours,
                           cellLoc='center',
                           colWidths=col_withds,
                           bbox=[0, 0, 1, 1.0],
                           loc='center')

        table.auto_set_font_size(False)
        table.set_fontsize(14)

    axis.set_xticks([])
    axis.axis('off')
    axis.set_title('Logs')

    return res
示例#4
0
        def is_valid_class(cls: pyclbr.Class):
            super_names = get_super_names(cls)
            if 'Executor' not in super_names:
                return False

            return cls.name == executor or \
                cls.name.lower() == executor or \
                to_snake(cls.name) == executor
示例#5
0
文件: log.py 项目: shlemph/mlcomp
    def get(self, filter: dict, options: PaginatorOptions):
        query = self.query(Log, Step, Task). \
            join(Step, Step.id == Log.step, isouter=True). \
            join(Task, Task.id == Log.task, isouter=True)

        if filter.get('message'):
            query = query.filter(Log.message.contains(filter['message']))

        if filter.get('dag'):
            query = query.filter(Task.dag == filter['dag'])

        if filter.get('task'):
            child_tasks = self.query(Task.id
                                     ).filter(Task.parent == filter['task']
                                              ).all()
            child_tasks = [c[0] for c in child_tasks]
            child_tasks.append(filter['task'])

            query = query.filter(Task.id.in_(child_tasks))

        if len(filter.get('components', [])) > 0:
            query = query.filter(Log.component.in_(filter['components']))

        if filter.get('computer'):
            query = query.filter(Computer.name == filter['computer'])

        if len(filter.get('levels', [])) > 0:
            query = query.filter(Log.level.in_(filter['levels']))

        if filter.get('task_name'):
            query = query.filter(Task.name.like(f'%{filter["task_name"]}%'))

        if filter.get('step_name'):
            query = query.filter(Step.name.like(f'%{filter["step_name"]}%'))

        if filter.get('step'):
            query = query.filter(Step.id == filter['step'])

        total = query.count()
        data = []
        for log, step, task in self.paginator(query, options):
            item = {
                'id': log.id,
                'message': log.message.split('\n'),
                'module': log.module,
                'line': log.line,
                'time': self.serializer.serialize_datetime(log.time),
                'level': log_name(log.level),
                'component': to_snake(ComponentType(log.component).name),
                'computer': log.computer,
                'step': self.to_dict(step) if step else None,
                'task': self.to_dict(task, rules=('-additional_info', ))
                if task else None
            }
            data.append(item)

        return {'total': total, 'data': data}
示例#6
0
文件: app.py 项目: xang1234/mlcomp
def task_stop():
    data = request_data()
    provider = TaskProvider(_write_session)
    task = provider.by_id(data['id'], joinedload(Task.dag_rel, innerjoin=True))

    dag = task.dag_rel
    status = celery_tasks.stop(logger, _write_session, task, dag)

    child_tasks = provider.children(task.id)
    for t in child_tasks:
        celery_tasks.stop(logger, _write_session, t, dag)

    return {'status': to_snake(TaskStatus(status).name)}
示例#7
0
文件: step.py 项目: xyuan/mlcomp
 def step_info(self, step):
     step, *log_status = step
     duration = ((step.finished if step.finished else now()) - step.started)
     res = {
         'id':
         step.id,
         'name':
         step.name,
         'level':
         step.level,
         'duration':
         duration.total_seconds(),
         'log_statuses': [{
             'name': to_snake(e.name),
             'count': s
         } for e, s in zip(LogStatus, log_status)]
     }
     return res
示例#8
0
 def register(cls):
     Executor._child[cls.__name__] = cls
     Executor._child[cls.__name__.lower()] = cls
     Executor._child[to_snake(cls.__name__)] = cls
     return cls
示例#9
0
    def get(self, filter: dict, options: PaginatorOptions):
        query = self.query(Task, Project.name). \
            join(Dag, Dag.id == Task.dag). \
            join(Project, Project.id == Dag.project). \
            options(joinedload(Task.dag_rel, innerjoin=True))

        query = self._get_filter(query, filter)

        total = query.count()
        paginator = self.paginator(query, options)
        res = []

        for p, project_name in paginator.all():
            if p.dag_rel is None:
                continue

            item = {**self.to_dict(p, rules=('-additional_info', ))}
            item['status'] = to_snake(TaskStatus(item['status']).name)
            item['type'] = to_snake(TaskType(item['type']).name)
            item['dag_rel']['project'] = {
                'id': item['dag_rel']['project'],
                'name': project_name
            }
            if p.started is None:
                delta = 0
            elif p.status == TaskStatus.InProgress.value:
                delta = (now() - p.started).total_seconds()
            else:
                finish = (p.finished or p.last_activity)
                delta = (finish - p.started).total_seconds()
            item['duration'] = duration_format(delta)
            if p.dag_rel is not None:
                res.append(item)

        if filter.get('report'):
            tasks_within_report = self.query(
                ReportTasks.task
            ).filter(ReportTasks.report == int(filter['report']))
            tasks_within_report = {t[0] for t in tasks_within_report}
            for r in res:
                r['report_full'] = r['id'] in tasks_within_report

        projects = self.query(Project.name, Project.id). \
            order_by(Project.id.desc()). \
            limit(20). \
            all()
        dags = self.query(Dag.name, Dag.id). \
            order_by(Dag.id.desc()). \
            limit(20). \
            all()
        projects = [{'name': name, 'id': id} for name, id in projects]
        dags = [{'name': name, 'id': id} for name, id in dags]

        dags_model = self.query(Dag.name, Dag.id, Dag.project). \
            filter(Dag.type == DagType.Pipe.value). \
            order_by(Dag.id.desc()). \
            all()

        dags_model_dict = []
        used_dag_names = set()

        for name, id, project in dags_model:
            if name in used_dag_names:
                continue

            dag = {'name': name, 'id': id, 'project': project}
            dags_model_dict.append(dag)
            used_dag_names.add(name)

        return {
            'total': total,
            'data': res,
            'projects': projects,
            'dags': dags,
            'dags_model': dags_model_dict
        }
示例#10
0
文件: storage.py 项目: shlemph/mlcomp
 def is_valid_class(cls: pyclbr.Class):
     return cls.name == executor or \
            cls.name.lower() == executor or \
            to_snake(cls.name) == executor
示例#11
0
文件: dag.py 项目: ASRlytics/mlcomp
    def get(self, filter: dict, options: PaginatorOptions = None):
        task_status = []
        for e in TaskStatus:
            task_status.append(
                func.sum(
                    case(
                        whens=[(Task.status == e.value, 1)],
                        else_=0
                    ).label(e.name)
                )
            )

        last_activity = func.max(Task.last_activity).label('last_activity')
        funcs = [
            func.count(Task.id).label('task_count'), last_activity,
            func.min(Task.started).label('started'),
            func.max(Task.finished).label('finished')
        ]

        query = self.query(Dag, Project.name, *funcs,
                           *task_status).join(Project)
        query = self._get_filter(query, filter, last_activity)

        status_clauses = []
        for agg, e in zip(task_status, TaskStatus):
            if filter.get('status', {}).get(to_snake(e.name)):
                status_clauses.append(agg > 0)
        if len(status_clauses) > 0:
            query = query.having(or_(*status_clauses))

        query = query.join(Task, isouter=True).group_by(Dag.id, Project.name)
        # Do not include service tasks
        query = query.filter(Task.type < TaskType.Service.value)

        total = query.count()
        paginator = self.paginator(query, options) if options else query
        res = []
        rules = ('-tasks.dag_rel', )
        for dag, \
                project_name, \
                task_count, \
                last_activity, \
                started, \
                finished, \
                *(task_status) in paginator.all():

            items = self.to_dict(dag, rules=rules).items()
            # noinspection PyDictCreation
            r = {
                'task_count': task_count,
                'last_activity': last_activity,
                'started': started,
                'finished': finished,
                **{k: v
                   for k, v in items if k not in ['tasks', 'config']}
            }
            r['project'] = {'name': project_name}

            r['task_statuses'] = [
                {
                    'name': to_snake(e.name),
                    'count': s
                } for e, s in zip(TaskStatus, task_status)
            ]
            r['last_activity'] = self.serializer.serialize_datetime(
                r['last_activity']
            ) if r['last_activity'] else None
            r['started'] = self.serializer.serialize_datetime(r['started']) \
                if r['started'] else None
            r['finished'] = self.serializer.serialize_datetime(
                r['finished']
            ) if r['finished'] else None

            if task_status[TaskStatus.InProgress.value] > 0:
                delta = (now() - started).total_seconds()
            elif sum(
                task_status[TaskStatus.InProgress.value:]
            ) == 0 or not started or not last_activity:
                delta = 0
            else:
                delta = (last_activity - started).total_seconds()

            r['duration'] = duration_format(delta)
            res.append(r)

        if filter.get('report'):
            dag_ids = [r['id'] for r in res]
            tasks_dags = self.query(Task.id, Task.dag). \
                filter(Task.type <= TaskType.Train.value). \
                filter(Task.dag.in_(dag_ids)). \
                all()

            tasks_within_report = self.query(ReportTasks.task). \
                filter(ReportTasks.report == int(filter['report']))

            tasks_within_report = {t[0] for t in tasks_within_report}
            dags_not_full_included = {
                d
                for t, d in tasks_dags if t not in tasks_within_report
            }
            for r in res:
                r['report_full'] = r['id'] not in dags_not_full_included

        projects = self.query(Project.name, Project.id). \
            order_by(Project.id.desc()). \
            limit(20). \
            all()

        projects = [{'name': name, 'id': id} for name, id in projects]
        return {'total': total, 'data': res, 'projects': projects}
示例#12
0
文件: describe.py 项目: xyuan/mlcomp
def describe_tasks(dag: int, axis):
    provider = TaskProvider()
    columns = ['Id', 'Started', 'Duration', 'Step', 'Status']
    cells = []
    cells_colours = []

    tasks = provider.by_dag(dag)

    status_colors = {
        'not_ran': 'gray',
        'queued': 'lightblue',
        'in_progress': 'lime',
        'failed': '#e83217',
        'stopped': '#cb88ea',
        'skipped': 'orange',
        'success': 'green'
    }

    finish = True

    for task in tasks:
        started = ''
        duration = ''

        if task.status <= TaskStatus.InProgress.value:
            finish = False

        if task.started:
            started = task.started.strftime('%m.%d %H:%M:%S')
            if task.finished:
                duration = (task.finished - task.started).total_seconds()
            else:
                duration = (now() - task.started).total_seconds()

            if duration > 3600:
                duration = f'{int(duration // 3600)} hours ' \
                           f'{int((duration % 3600) // 60)} min' \
                           f' {int(duration % 60)} sec'
            elif duration > 60:
                duration = f'{int(duration // 60)} min' \
                           f' {int(duration % 60)} sec'
            else:
                duration = f'{int(duration)} sec'

        status = to_snake(TaskStatus(task.status).name)
        status_color = status_colors[status]

        task_cells = [
            str(task.id), started, duration, task.current_step or '1', status
        ]
        task_colors = ['white', 'white', 'white', 'white', status_color]
        cells.append(task_cells)
        cells_colours.append(task_colors)

    table = axis.table(cellText=cells,
                       colLabels=columns,
                       cellColours=cells_colours,
                       cellLoc='center',
                       colWidths=[0.2, 0.3, 0.4, 0.1, 0.2],
                       bbox=[0, 0, 1.0, 1.0],
                       loc='center')

    table.auto_set_font_size(False)
    table.set_fontsize(14)

    axis.set_xticks([])
    axis.axis('off')
    axis.set_title('Tasks')

    return finish
示例#13
0
 def register(cls):
     Interface._child[cls.__name__] = cls
     Interface._child[cls.__name__.lower()] = cls
     Interface._child[to_snake(cls.__name__)] = cls
     return cls
示例#14
0
 def names_snake(cls):
     return [to_snake(n) for n in cls.names()]