示例#1
0
    def _create_work_batches(cls, subTask, rawPieces, priority):

        key_gen, order_by = cls._get_key_gen(subTask.batchingMode,
                                             subTask.maxPageSize)
        loads = cls._group_into_batches(rawPieces, key_gen, order_by)

        # create batches
        batches = []

        for key, batch_load in loads.iteritems():

            if isinstance(key, tuple):
                name = key[0]
            else:
                name = key

            b = m.Batch(taskId=subTask.taskId,
                        subTaskId=subTask.subTaskId,
                        priority=priority,
                        name=name)

            for pageIndex, page_load in enumerate(
                    split_by_size(batch_load, subTask.maxPageSize)):
                p = m.Page(pageIndex=pageIndex)
                b.pages.append(p)

                for memberIndex, rawPiece in enumerate(page_load):
                    memberEntry = m.PageMemberEntry(memberIndex=memberIndex)
                    memberEntry.rawPieceId = rawPiece.rawPieceId
                    p.memberEntries.append(memberEntry)

            batches.append(b)

        return batches
示例#2
0
    def route(batch_router, raw_pieces, priority=5):

        # get routed sub tasks
        sub_tasks = [s.sub_task for s in batch_router.sub_tasks]
        assert sub_tasks

        # check consistent batching mode and get page size
        batching_mode = sub_tasks[0].batchingMode
        page_size = None  # this will only be used for None batching mode

        for sub_task in sub_tasks:
            if sub_task.batchingMode != batching_mode:
                raise ValueError("inconsistent batching modes")

            if page_size is None:
                page_size = sub_task.maxPageSize
            else:
                page_size = min(page_size, sub_task.maxPageSize)

        key_gen, order_by = _batcher._get_key_gen(batching_mode, page_size)
        loads = _batcher._group_into_batches(raw_pieces, key_gen, order_by)

        batches = []

        for key, batch_load in loads.iteritems():

            if isinstance(key, tuple):
                name = key[0]
            else:
                name = key

            sub_task = batch_router.get_routed_sub_task(key)

            b = m.Batch(taskId=sub_task.taskId,
                        subTaskId=sub_task.subTaskId,
                        priority=priority,
                        name=name)

            for pageIndex, page_load in enumerate(
                    split_by_size(batch_load, sub_task.maxPageSize)):
                p = m.Page(pageIndex=pageIndex)
                b.pages.append(p)

                for memberIndex, rawPiece in enumerate(page_load):
                    memberEntry = m.PageMemberEntry(memberIndex=memberIndex)
                    memberEntry.rawPieceId = rawPiece.rawPieceId
                    p.memberEntries.append(memberEntry)

            batches.append(b)

        return batches
示例#3
0
	def create_qa_batches(self, qaSubTask, userId, intervalId, samples, priority=5):
		if not samples:
			return
		for load in self.paginate(samples, qaSubTask.maxPageSize):
			b = m.Batch(taskId=qaSubTask.taskId,
				subTaskId=qaSubTask.subTaskId,
				notUserId=userId,
				workIntervalId=intervalId,
				priority=priority)
			p = m.Page(pageIndex=0)
			b.pages.append(p)
			for memberIndex, workEntryId in enumerate(load):
				memberEntry = m.PageMemberEntry(memberIndex=memberIndex)
				memberEntry.workEntryId = workEntryId
				p.memberEntries.append(memberEntry)
			SS.add(b)
		SS.flush()
示例#4
0
    def _create_rework_batches(subTask, rawPieceIds, priority):
        key_gen = lambda i, x: (None, i / subTask.maxPageSize)

        loads = OrderedDict()
        for i, rawPieceId in enumerate(rawPieceIds):
            loads.setdefault(key_gen(i, None), []).append(rawPieceId)

        batches = []
        for batch_load in loads.values():
            b = m.Batch(taskId=subTask.taskId,
                        subTaskId=subTask.subTaskId,
                        priority=priority)
            for pageIndex, page_load in enumerate(
                    split_by_size(batch_load, subTask.maxPageSize)):
                p = m.Page(pageIndex=pageIndex)
                b.pages.append(p)
                for memberIndex, rawPieceId in enumerate(page_load):
                    memberEntry = m.PageMemberEntry(memberIndex=memberIndex)
                    memberEntry.rawPieceId = rawPieceId
                    p.memberEntries.append(memberEntry)
            batches.append(b)
        return batches
示例#5
0
def load_qa_failed(task):
    print 'checking task %s' % task.taskId
    subTaskById = dict([(s.subTaskId, s) for s in task.subTasks])
    work_plan = {}
    for s in task.subTasks:
        if s.qaConfig and s.qaConfig.populateRework:
            qaSubTask = subTaskById[s.qaConfig.qaSubTaskId]
            reworkSubTask = subTaskById[s.qaConfig.reworkSubTaskId]
            rec = work_plan.setdefault(s.qaConfig.qaSubTaskId, {})
            if rec == {}:
                rec['qaSubTask'] = qaSubTask
            rec[s.subTaskId] = {
                'src': s,
                'dest': reworkSubTask,
                'threshold': s.qaConfig.accuracyThreshold
            }

    subTaskIds = [
        s.subTaskId for s in SS.query(m.SubTask.subTaskId).join(
            m.WorkType, m.SubTask.workTypeId == m.WorkType.workTypeId).filter(
                m.WorkType.modifiesTranscription).filter(
                    m.Task.taskId == task.taskId)
    ]

    q_latest = m.WorkEntry.query.filter(
        m.WorkEntry.taskId == task.taskId).filter(
            m.WorkEntry.subTaskId.in_(subTaskIds)).distinct(
                m.WorkEntry.rawPieceId).order_by(m.WorkEntry.rawPieceId,
                                                 m.WorkEntry.created.desc())

    currently_batched = {}
    for r in m.PageMember.query.filter(
            m.PageMember.taskId == task.taskId).filter(
                m.PageMember.workType == m.WorkType.REWORK).all():
        currently_batched.setdefault(r.subTaskId, set()).add(r.rawPieceId)

    group_plan = {}
    for entry in q_latest.all():
        log.debug('checking raw piece {}, entry {}'.format(
            entry.rawPieceId, entry.entryId))
        qa_record = m.QaTypeEntry.query.filter(
            m.QaTypeEntry.qaedEntryId == entry.entryId).distinct(
                m.QaTypeEntry.qaedEntryId).order_by(
                    m.QaTypeEntry.qaedEntryId,
                    m.QaTypeEntry.created.desc()).all()
        if qa_record:
            qa_record = qa_record[0]
        else:
            log.debug('entry {} not qaed'.format(entry.entryId))
            continue
        rec = work_plan.get(qa_record.subTaskId, None)
        if not rec:
            log.debug('no auto-population configure for qa sub task {}'.format(
                qa_record.subTaskId))
            continue
        if entry.subTaskId not in rec:
            log.debug('sub task not configured for auto-population: {}'.format(
                entry.subTaskId))
            continue
        cfg = rec[entry.subTaskId]
        if qa_record.qaScore >= cfg['threshold']:
            log.debug('qa score passed')
            continue
        reworkSubTask = cfg['dest']
        if entry.rawPieceId in currently_batched.setdefault(
                reworkSubTask.subTaskId, set()):
            log.debug('already batched in sub task {}'.format(
                reworkSubTask.subTaskId))
            continue
        group_plan.setdefault(cfg['dest'], {}).setdefault(entry.userId,
                                                          []).append(entry)

    #print 'group_plan', group_plan
    for reworkSubTask, workEntriesByUserId in group_plan.iteritems():
        for userId, entries in workEntriesByUserId.iteritems():
            batch = m.Batch(taskId=task.taskId,
                            subTaskId=reworkSubTask.subTaskId,
                            name='qa_failed_of_user#%s' % userId,
                            priority=5)
            p = m.Page(pageIndex=0)
            batch.pages.append(p)
            for memberIndex, qaedEntry in enumerate(entries):
                memberEntry = m.PageMemberEntry(memberIndex=memberIndex)
                memberEntry.rawPieceId = qaedEntry.rawPieceId
                p.memberEntries.append(memberEntry)
            SS.add(batch)