示例#1
0
    def confirm_delete_view(self, request):
        project_ids = request.session.get('_project_ids')

        if request.method == 'GET':
            doc_ids = Document.all_objects.filter(
                project_id__in=project_ids).values_list('id', flat=True)

            from apps.document.repository.document_bulk_delete \
                import get_document_bulk_delete
            items_by_table = get_document_bulk_delete().calculate_deleting_count(doc_ids)
            mdc = ModelClassDictionary()
            del_count_hash = {mdc.get_model_class_name_hr(t): items_by_table[t]
                              for t in items_by_table if t in mdc.model_by_table}
            del_count = [(d, del_count_hash[d], False) for d in del_count_hash]
            del_count = sorted(del_count, key=lambda x: x[0])
            del_count.insert(0, ('Documents', len(doc_ids), True))
            del_count.insert(0, ('Projects', len(project_ids), True))

            context = {
                'deleting_count': del_count,
                'return_url': 'admin:project_softdeleteproject_changelist'
            }
            from django.shortcuts import render
            return render(request, "admin/common/confirm_delete_view.html", context)

        # POST: actual delete
        from apps.task.tasks import call_task
        call_task(
            task_name='CleanProjects',
            module_name='apps.project.tasks',
            _project_ids=project_ids,
            user_id=request.user.id,
            delete=True)
        from django.http import HttpResponseRedirect
        return HttpResponseRedirect("../")
示例#2
0
    def post(self, request, *args, **kwargs):
        data = request.POST.dict()
        if not data:
            return self.json_response('error', status=404)
        data['user_id'] = request.user.pk

        rejected_tasks = []
        started_tasks = []
        for task_alias, metadata in self.tasks_map.items():
            task_name = metadata['task_name']
            if Task.disallow_start(task_name):
                rejected_tasks.append(task_name)
                continue
            repo_paths = [
                '{}/{}/{}'.format(settings.GIT_DATA_REPO_ROOT,
                                  j.replace('{}_locale_'.format(i), ''),
                                  self.paths_map[i]) for i in data
                if i.startswith(task_alias) and i in self.paths_map
                for j in data if j.startswith('{}_locale_'.format(i))
            ]
            file_path = data.get('{}_file_path'.format(task_alias)) or None
            delete = '{}_delete'.format(task_alias) in data
            if any([repo_paths, file_path, delete]):
                call_task(task_name,
                          repo_paths=repo_paths,
                          file_path=file_path,
                          delete=delete,
                          metadata=metadata)
                started_tasks.append(task_name)
        return self.json_response('The task is started. It can take a while.')
示例#3
0
    def handle(self, *args, **options):

        ids = options.get('ids') or []
        delete_project_itself = options.get('delete')
        safe_delete = options.get('safe')
        do_async = options.get('async')

        if do_async is True:
            call_task(CleanProjects,
                      _project_ids=ids,
                      delete=delete_project_itself,
                      safe_delete=safe_delete)
        else:
            start = datetime.datetime.now()

            for id in ids:
                delete_task = CleanProject()
                delete_task.process(_project_id=id,
                                    delete=delete_project_itself,
                                    safe_delete=safe_delete,
                                    skip_task_updating=True)

            time_elapsed = (datetime.datetime.now() - start).total_seconds()
            self.stdout.write(
                f'Deleting {len(ids)} project(s) took {time_elapsed} s.')
示例#4
0
    def process(self, **kwargs):
        project_id = kwargs.get('_project_id')
        delete = bool(kwargs.get('delete'))
        project = Project.all_objects.get(pk=project_id)

        # get doc ids and remove docs' source files
        proj_doc_ids = self.document_repository.get_project_document_ids(
            project_id)
        file_paths = self.document_repository.get_all_document_source_paths(
            proj_doc_ids)
        from apps.document.tasks import DeleteDocumentFiles
        call_task(DeleteDocumentFiles, metadata=file_paths)

        # delete documents
        from apps.document.repository.document_bulk_delete \
            import get_document_bulk_delete
        get_document_bulk_delete().delete_documents(proj_doc_ids)

        # delete project itself
        project.cleanup(delete=delete)

        # store data about cleanup in ProjectCleanup Task
        task_model = self.task
        task_model.metadata = {
            'task_name': 'clean-project',
            '_project_id':
            project_id  # added "_" to avoid detecting task as project task
        }
        task_model.save()
    def send_clusters_to_project(self, request, **kwargs):
        """
        Send clusters to another Project\n
            Params:
                - cluster_ids: list[int]
                - project_id: int
        """
        project = self.get_object()
        cluster_ids = [int(i) for i in request.POST.getlist('cluster_ids')]
        project_clustering = project.projectclustering_set.last()
        if not project_clustering:
            raise APIException('Project Clustering object not found')
        reassigned_cluster_ids = project_clustering.metadata.get(
            'reassigned_cluster_ids', [])
        already_reassigned_clusters = set(cluster_ids) & set(
            reassigned_cluster_ids)
        if already_reassigned_clusters:
            raise APIException(
                'Cluster(s) id=({}) is/are already reassigned to another project'
                .format(', '.join(str(i)
                                  for i in already_reassigned_clusters)))

        new_project_id = int(request.POST.get('project_id'))
        call_task(task_name='ReassignProjectClusterDocuments',
                  module_name='apps.project.tasks',
                  cluster_ids=cluster_ids,
                  project_id=project.id,
                  new_project_id=new_project_id,
                  user_id=request.user.id)

        return Response('OK')
示例#6
0
def retrain_dirty_fields(self):
    TaskUtils.prepare_task_execution()
    if DocumentTypeField.objects.has_dirty_fields():
        task_name = TrainDirtyDocumentFieldDetectorModel.name
        execution_delay = now() - datetime.timedelta(
            seconds=settings.RETRAINING_TASK_EXECUTION_DELAY_IN_SEC)
        if not Task.objects.active_tasks_exist(task_name, execution_delay):
            call_task(task_name, module_name='apps.document.tasks')
 def form_valid(self, form):
     block_msg = check_blocks(raise_error=False)
     if block_msg is not False:
         return HttpResponseForbidden(block_msg)
     data = form.cleaned_data
     data['user_id'] = self.request.user.pk
     call_task(self.task_name, **data)
     return redirect(reverse('task:task-list'))
    def post(self, request, *args, **kwargs):
        form = self.form_class(request.POST)
        if not form.is_valid():
            return self.json_response(form.errors, status=404)
        data = form.cleaned_data
        task_names = set([i.split('_')[0] for i in data if i != 'parse'])
        custom_task_names = task_names & self.custom_tasks
        lexnlp_task_names = task_names - self.custom_tasks

        # custom tasks
        rejected_tasks = []
        started_tasks = []
        for task_name in custom_task_names:
            kwargs = {k.replace('%s_' % task_name, ''): v for k, v in data.items()
                      if k.startswith(task_name)}
            if any(kwargs.values()):
                kwargs['user_id'] = request.user.pk
                if Task.disallow_start(task_name):
                    rejected_tasks.append(task_name)
                else:
                    started_tasks.append(task_name)
                    locator_result_link = self.locator_result_links_map.get(task_name)
                    if locator_result_link:
                        kwargs['metadata'] = {'result_links': [locator_result_link]}
                    call_task(task_name, **kwargs)

        # lexnlp tasks
        lexnlp_task_data = dict()
        for task_name in lexnlp_task_names:
            kwargs = {k.replace('%s_' % task_name, ''): v for k, v in data.items()
                      if k.startswith(task_name)}
            if any(kwargs.values()):
                lexnlp_task_data[task_name] = kwargs

        if lexnlp_task_data:
            if Task.disallow_start('Locate'):
                rejected_tasks.append('Locate')
            else:
                started_tasks.append('Locate')
                call_task('Locate',
                          tasks=lexnlp_task_data,
                          parse=data['parse'],
                          user_id=request.user.pk,
                          metadata={
                              'description': [i for i, j in lexnlp_task_data.items()
                                              if j.get('locate')],
                              'result_links': [self.locator_result_links_map[i]
                                               for i, j in lexnlp_task_data.items()
                                               if j.get('locate')]})

        response_text = ''
        if started_tasks:
            response_text += 'The Task is started. It can take a while.<br />'
            response_text += 'Started tasks: [{}].<br />'.format(', '.join(started_tasks))
        if rejected_tasks:
            response_text += 'Some tasks were rejected (already started).<br />'
            response_text += 'Rejected Tasks: [{}]'.format(', '.join(rejected_tasks))
        return self.json_response(response_text)
示例#9
0
    def cleanup(self, request, **kwargs):
        """
        Clean project (Generic Contract Type project)
        """
        call_task(task_name='CleanProject',
                  module_name='apps.project.tasks',
                  project_id=int(kwargs['pk']),
                  user_id=request.user.id)

        return Response('OK')
示例#10
0
def retrain_dirty_fields(self):
    TaskUtils.prepare_task_execution()
    if DocumentTypeField.objects.has_dirty_fields():
        task_name = TrainDirtyDocumentFieldDetectorModel.name
        execution_delay = now() - datetime.timedelta(
            seconds=settings.RETRAINING_TASK_EXECUTION_DELAY_IN_SEC)
        if not Task.objects.filter(name=task_name,
                                   own_status='PENDING',
                                   date_start__gt=execution_delay).exists():
            call_task(TrainDirtyDocumentFieldDetectorModel.name,
                      module_name='apps.document.tasks')
示例#11
0
    def upload(self, request, **kwargs):
        """
        Upload a File\n
            Params:
                - file: file object
                - force: bool (optional) - whether rewrite existing file and Document
                - send_email_notifications: bool (optional) - sent notification email that batch uploading started
        """
        file_ = request.FILES.dict().get('file')
        if not file_:
            raise APIException('File not found.')
        if file_.size > MAX_DOCUMENT_SIZE.val * 1024 * 1024:
            raise APIException(
                f'File size is greater than allowed max {MAX_DOCUMENT_SIZE.val} Mb'
            )

        session_id = kwargs.get('pk')
        try:
            session = self.get_object()
        except (UploadSession.DoesNotExist, ValidationError):
            raise APIException('Wrong upload session uid.')

        project = session.project

        try:
            source_path = self.get_source_path(request, **kwargs)

            # Code for running locators and detecting field values has been moved to LoadDocuments task
            # for the unification purposes between old and new ui.

            call_task(task_name='LoadDocuments',
                      source_data=source_path,
                      user_id=request.user.id,
                      session_id=session_id,
                      run_standard_locators=True,
                      metadata={
                          'session_id': session_id,
                          'file_name': file_.name
                      },
                      linked_tasks=None)

            if project.send_email_notification and \
                    request.POST.get('send_email_notifications') == 'true' and \
                    not session.notified_upload_started:
                self._notify_upload_started(session)

        except Exception as e:
            if str(e) == ALREADY_EXISTS:
                return Response(ALREADY_EXISTS)
            raise APIException(str(e))

        return Response('Loaded')
示例#12
0
    def cleanup(self, request, **kwargs):
        """
        Clean project (Generic Contract Type project)
        """
        delete = json.loads(request.data.get('delete', 'true'))

        call_task(task_name='CleanProject',
                  module_name='apps.project.tasks',
                  _project_id=int(kwargs['pk']),
                  delete=delete,
                  user_id=request.user.id)

        return Response('OK')
示例#13
0
 def post(self, request, *args, **kwargs):
     if Task.disallow_start(self.task_name):
         return HttpResponseForbidden(
             'Forbidden. Such task is already started.')
     if self.form_class is None:
         data = request.POST.dict()
     else:
         form = self.form_class(request.POST)
         if not form.is_valid():
             return self.json_response(form.errors, status=400)
         data = form.cleaned_data
     data['user_id'] = request.user.pk
     data['metadata'] = self.get_metadata()
     call_task(self.task_name, **data)
     return self.json_response('The task is started. It can take a while.')
示例#14
0
def trigger_imanage_sync(_task: ExtendedTask):
    # SQL: Return 1 if there are enabled imanage configs last time processed too long ago
    # and there are no sync tasks pending.
    sql = SQLClause('''select case when ( 
    exists (select * from "{table_name}" where enabled = True 
    and (last_sync_start is null 
    or (last_sync_start + (sync_frequency_minutes::text||\' minute\')::INTERVAL) <= now()) 
    limit 1)
    and not exists (select * from "{task_table_name}" where name = %s and status in %s) 
    ) then 1 else 0 end   
    '''.format(table_name=IManageConfig._meta.db_table, task_table_name=Task._meta.db_table),
                    [IManageSynchronization.name, UNREADY_STATE_TUPLE])

    with connection.cursor() as cursor:
        if fetch_int(cursor, sql):
            call_task(IManageSynchronization.name, auto=True, module_name='apps.imanage_integration.tasks')
示例#15
0
    def batch_upload(self, request, **kwargs):
        """
        Upload files from given sub-folder in media/data/documents folder\n
            Params:
                - source_path: relative path to a folder with documents
                - send_email_notifications: bool (optional) - sent notification email that batch uploading started
        """
        session = self.get_object()
        session_id = session.pk
        project = session.project
        folder_name = request.POST.get('folder') or request.POST.get(
            'source_path')

        if not session_id or not folder_name:
            raise ValidationError('Provide session id and folder name.')

        file_list = get_file_storage().list_documents(folder_name)
        # TODO: limit file size - see def upload()
        for file_path in file_list:
            file_name = os.path.basename(file_path)

            # Code for running locators and detecting field values has been moved to LoadDocuments task
            # for the unification purposes between old and new ui.

            call_task(task_name='LoadDocuments',
                      source_data=file_path,
                      user_id=request.user.id,
                      session_id=session_id,
                      metadata={
                          'session_id': session_id,
                          'file_name': file_name
                      },
                      run_standard_locators=True,
                      linked_tasks=None)

        if project.send_email_notification and \
                request.POST.get('send_email_notifications') == 'true' and \
                not session.notified_upload_started:
            self._notify_upload_started(session)

        return Response('Started')
    def post(self, request, *args, **kwargs):
        form = self.form_class(request.POST)
        data = None
        document_fields = None

        if form.is_valid():
            data = form.data
            document_fields = data.get('document_fields')
            source_data = data.get('source_data')
            document_name = data.get('document_name')
            if document_fields:
                if not document_name:
                    form.add_error('document_name', 'Document name should be specified if you specified the fields.')
                try:
                    document_fields = json.loads(document_fields)
                except:
                    form.add_error('document_fields', 'Incorrect JSON format')
            else:
                if not source_data:
                    form.add_error('source_data', 'Either document document fields or source path should be specified.')

        if not form.is_valid():
            return self.json_response(form.errors, status=400)

        document_name = data.get('document_name')

        call_task(
            task_name='LoadDocumentWithFields',
            module_name='apps.document.tasks',
            source_data=data.get('source_data'),
            project_id=data.get('project'),
            document_type_id=data.get('document_type'),
            document_name=document_name,
            document_fields=document_fields,
            run_detect_field_values=data.get('run_detect_field_values')
        )
        return self.json_response('The task is started. It can take a while.')
示例#17
0
    def cluster(self, request, **kwargs):
        """
        Cluster Project Documents\n
            Params:
                - method: str[KMeans, MiniBatchKMeans, Birch, DBSCAN]
                - n_clusters: int
                - force: bool (optional) - force clustering if uncompleted tasks exist
        """
        if not request.POST.get('force') == 'true':
            obj = self.get_object()
            progress = project_progress(obj)
            if progress['uncompleted_sessions'] is not None:
                raise APIException('Project has uncompleted upload sessions.')
            elif progress['completed_sessions'] is None:
                raise APIException("Project hasn't completed upload sessions.")

        project_id = kwargs.get('pk')

        project_clustering = ProjectClustering.objects.create(
            project_id=project_id)

        try:
            n_clusters = int(request.POST.get('n_clusters', 3))
        except ValueError:
            n_clusters = 3

        task_id = call_task(task_name='ClusterProjectDocuments',
                            module_name='apps.project.tasks',
                            user_id=request.user.id,
                            project_id=project_id,
                            project_clustering_id=project_clustering.id,
                            method=request.POST.get('method', 'KMeans'),
                            metadata={'project_id': project_id},
                            n_clusters=n_clusters)

        return Response({
            'task_id': task_id,
            'project_clustering_id': project_clustering.id
        })
示例#18
0
    def cluster(self, request, **kwargs):
        """
        Cluster Project Documents\n
            Params:
                - method: str[KMeans, MiniBatchKMeans, Birch, DBSCAN]
                - n_clusters: int
                - force: bool (optional) - force clustering if uncompleted tasks exist
        """
        project = getattr(self, 'object', None) or self.get_object()

        if not request.POST.get('force') == 'true':
            if project.uploadsession_set.filter(completed=False).exists():
                raise APIException('Project has uncompleted upload sessions.')
            elif not project.uploadsession_set.filter(completed=True).exists():
                raise APIException("Project hasn't completed upload sessions.")

        project_clustering = ProjectClustering.objects.create(project=project)

        try:
            n_clusters = int(request.POST.get('n_clusters', 3))
        except ValueError:
            n_clusters = 3

        task_id = call_task(task_name='ClusterProjectDocuments',
                            module_name='apps.project.tasks',
                            user_id=request.user.id,
                            project_id=project.id,
                            project_clustering_id=project_clustering.id,
                            method=request.POST.get('method', 'KMeans'),
                            metadata={'project_id': project.id},
                            n_clusters=n_clusters)

        return Response({
            'task_id': task_id,
            'project_clustering_id': project_clustering.id
        })
示例#19
0
 def form_valid(self, form):
     data = form.cleaned_data
     data['user_id'] = self.request.user.pk
     call_task(self.task_name, **data)
     return redirect(reverse('task:task-list'))
示例#20
0
 def handle(self, *args, **options):
     call_task('Locate Parties', **options)
示例#21
0
    def batch_upload(self, request, **kwargs):
        """
        Upload files from given sub-folder in media/data/documents folder\n
            Params:
                - source_path: relative path to a folder with documents
                - send_email_notifications: bool (optional) - sent notification email that batch uploading started
        """
        session = self.get_object()
        session_id = session.pk
        project = session.project
        folder_name = request.POST.get('folder') or request.POST.get(
            'source_path')

        if not session_id or not folder_name:
            raise ValidationError('Provide session id and folder name.')

        file_list = file_access_handler.list(folder_name)

        required_locators = [
            'date', 'party', 'term', 'geoentity', 'currency', 'citation',
            'definition', 'duration'
        ]

        for file_path in file_list:

            file_name = os.path.basename(file_path)

            linked_tasks = [{
                'task_name': 'Locate',
                'locate': required_locators,
                'parse': 'sentences',
                'do_delete': False,
                'session_id': session_id,
                'metadata': {
                    'session_id': session_id,
                    'file_name': file_name
                },
                'user_id': request.user.id
            }]

            # if Document type specified
            if project.type:

                for app_name in custom_apps:
                    module_str = 'apps.%s.tasks' % app_name
                    module = sys.modules.get(module_str)
                    if hasattr(module, 'DetectFieldValues'):
                        linked_tasks.append({
                            'task_name': 'DetectFieldValues',
                            'module_name': module_str,
                            'do_not_write': False,
                            'session_id': session_id,
                            'metadata': {
                                'session_id': session_id,
                                'file_name': file_name
                            },
                            'user_id': request.user.id
                        })

            call_task(task_name='LoadDocuments',
                      source_data=file_path,
                      user_id=request.user.id,
                      session_id=session_id,
                      metadata={
                          'session_id': session_id,
                          'file_name': file_name
                      },
                      linked_tasks=linked_tasks)

        if project.send_email_notification and \
                request.POST.get('send_email_notifications') == 'true' and \
                not session.notified_upload_started:
            session.notify_upload_started()

        return Response('Started')
示例#22
0
    def post(self, request, *args, **kwargs):
        form = self.form_class(request.POST)
        if not form.is_valid():
            return self.json_response(form.errors, status=400)
        data = form.cleaned_data

        project_id = None
        project_ref = data.get('project')
        if project_ref:
            del data['project']
            project_id = project_ref.pk

        task_names = set([i.split('_')[0] for i in data if i != 'parse'])
        custom_task_names = task_names & self.custom_tasks
        lexnlp_task_names = task_names - self.custom_tasks

        # custom tasks
        rejected_tasks = []
        started_tasks = []
        for task_name in custom_task_names:
            kwargs = {
                k.replace('%s_' % task_name, ''): v
                for k, v in data.items() if k.startswith(task_name)
            }
            if any(kwargs.values()):
                kwargs['user_id'] = request.user.pk
                if Task.disallow_start(task_name):
                    rejected_tasks.append(task_name)
                else:
                    started_tasks.append(task_name)
                    locator_result_link = self.locator_result_links_map.get(
                        task_name)
                    if locator_result_link:
                        kwargs['metadata'] = {
                            'result_links': [locator_result_link]
                        }
                    try:
                        call_task(task_name, **kwargs)
                    except Exception as e:
                        return self.json_response(str(e), status=400)

        # lexnlp tasks
        lexnlp_task_data = dict()
        for task_name in lexnlp_task_names:
            kwargs = {
                k.replace('%s_' % task_name, ''): v
                for k, v in data.items() if k.startswith(task_name)
            }
            if any(kwargs.values()):
                lexnlp_task_data[task_name] = kwargs

        if lexnlp_task_data:
            # allow to start "Locate" task anytime
            started_tasks.append('Locate({})'.format(', '.join(
                lexnlp_task_data.keys())))
            try:
                call_task('Locate',
                          tasks=lexnlp_task_data,
                          parse=data['parse'],
                          user_id=request.user.pk,
                          project_id=project_id,
                          metadata={
                              'description': [
                                  i for i, j in lexnlp_task_data.items()
                                  if j.get('locate')
                              ],
                              'result_links': [
                                  self.locator_result_links_map[i]
                                  for i, j in lexnlp_task_data.items()
                                  if j.get('locate')
                              ]
                          })
            except Exception as e:
                return self.json_response(str(e), status=400)

        response_text = ''
        if started_tasks:
            response_text += f'{TASK_STARTED_MESSAGE}<br /><br />'
            response_text += 'Started tasks: [{}].<br />'.format(
                ', '.join(started_tasks))
        if rejected_tasks:
            response_text += 'Some tasks were rejected (already started).<br />'
            response_text += 'Rejected Tasks: [{}]'.format(
                ', '.join(rejected_tasks))
        return self.json_response(response_text)
示例#23
0
 def start_task(self, data):
     call_task(self.task_class or self.task_name, **data)
示例#24
0
 def handle(self, *args, **options):
     call_task('Load Documents', **options)
 def process(self, **kwargs):
     doc_ids = kwargs.get('_document_ids')
     file_paths = self.document_repository.get_all_document_source_paths(
         doc_ids)
     get_document_bulk_delete().delete_documents(doc_ids)
     call_task(DeleteDocumentFiles, metadata=file_paths)
def trigger_digests(_task):
    now_local = datetime.datetime.now(tz=tzlocal.get_localzone())
    role_ids_to_user_ids = defaultdict(set)  # type: Dict[int, Set[int]]
    user_ids_to_timezones = dict()  # type: Dict[int, datetime.tzinfo]
    all_user_ids = set()  # type: Set[int]

    for user_id, role_id, timezone in \
            User.objects.all().values_list('pk', 'role_id', 'timezone'):  # type: int, int, datetime.tzinfo
        role_ids_to_user_ids[role_id].add(user_id)
        timezone = timezone or tzlocal.get_localzone()
        user_ids_to_timezones[user_id] = timezone
        all_user_ids.add(user_id)

    for config_id, for_user_id, for_role_id, \
        run_at_month, run_at_day_of_month, run_at_day_of_week, run_at_hour, run_at_minute \
            in DocumentDigestConfig \
            .objects \
            .filter(enabled=True) \
            .values_list('pk', 'for_user_id', 'for_role_id',
                         'run_at_month', 'run_at_day_of_month', 'run_at_day_of_week', 'run_at_hour', 'run_at_minute'):

        run_at_month = _as_ints(run_at_month)
        run_at_day_of_month = _as_ints(run_at_day_of_month)
        run_at_day_of_week = _as_ints(run_at_day_of_week)
        run_at_hour = _as_ints(run_at_hour)
        run_at_minute = int(run_at_minute)

        for_user_ids = set()
        if for_user_id:
            for_user_ids.add(for_user_id)
        if for_role_id:
            for_user_ids.update(role_ids_to_user_ids[for_role_id])

        tz_to_user_ids_delta_date = dict()  # type: Dict[str, Tuple[Set[int], int, datetime.datetime]]

        for user_id in for_user_ids:
            timezone = user_ids_to_timezones[user_id]  # type: datetime.tzinfo
            now_in_tz = now_local.astimezone(timezone)

            if run_at_month and now_in_tz.month not in run_at_month:
                continue
            if run_at_day_of_month and now_in_tz.day not in run_at_day_of_month:
                continue
            if run_at_day_of_week and now_in_tz.isoweekday() not in run_at_day_of_week:
                continue
            if run_at_hour and now_in_tz.hour not in run_at_hour:
                continue

            tz_name = timezone.tzname(dt=None)
            if tz_name not in tz_to_user_ids_delta_date:
                if run_at_minute is None:
                    run_date_in_tz = now_in_tz
                    delta = 0
                else:
                    run_date_in_tz = now_in_tz.replace(minute=run_at_minute, second=0, microsecond=0)
                    delta = (run_date_in_tz - now_in_tz).total_seconds() if run_date_in_tz > now_in_tz else 0
                tz_to_user_ids_delta_date[tz_name] = ({user_id}, delta, run_date_in_tz)
            else:
                tz_to_user_ids_delta_date[tz_name][0].add(user_id)

        for tz_name, user_ids_delta_run_date in tz_to_user_ids_delta_date.items():
            user_ids, delta, run_date_in_tz = user_ids_delta_run_date  # type: Set[int], int, datetime.datetime
            if not _send_digest_scheduled(run_date_in_tz):
                call_task(SendDigest.name,
                          module_name=MODULE_NAME,
                          **{SendDigest.PARAM_USER_IDS: user_ids,
                             SendDigest.PARAM_CONFIG_ID: config_id,
                             SendDigest.PARAM_RUN_DATE: run_date_in_tz,
                             'countdown': delta})
示例#27
0
 def handle(self, *args, **options):
     call_task('Locate Terms', **options)
示例#28
0
    def upload(self, request, **kwargs):
        """
        Upload a File\n
            Params:
                - file: file object
                - force: bool (optional) - whether rewrite existing file and Document
        """
        session_id = kwargs.get('pk')
        project = self.get_object().project
        file_ = request.FILES.dict().get('file')

        if session_id and file_:
            try:
                project_storages = {
                    str(_session_id): FileSystemStorage(
                        location=os.path.join(
                            settings.MEDIA_ROOT,
                            settings.FILEBROWSER_DIRECTORY,
                            str(_session_id)))
                    for _session_id in project.uploadsession_set.values_list('pk', flat=True)}

                # check existing documents with the same name
                this_file_documents = project.document_set.filter(name=file_.name)

                # check existing files with the same name but not stored yet as Document
                this_file_storages = {
                    _session_id: _storage
                    for _session_id, _storage in project_storages.items()
                    if _storage.exists(file_.name) and not Document.objects.filter(
                        source_path=os.path.join(
                            _session_id, file_.name)).exists()}

                if this_file_documents.exists() or this_file_storages:
                    if request.POST.get('force') == 'true':
                        for _session_id, _storage in this_file_storages.items():
                            _storage.delete(file_.name)
                            file_tasks = Task.objects\
                                .filter(metadata__session_id=_session_id)\
                                .filter(metadata__file_name=file_.name)
                            for file_task in file_tasks:
                                if file_task.metadata.get('file_name') == file_.name:
                                    purge_task(file_task.id)
                            # redundant?
                            Document.objects\
                                .filter(upload_session_id=_session_id, name=file_.name)\
                                .delete()
                        for doc in this_file_documents:
                            doc.delete()
                    else:
                        raise APIException('Already exists')

                storage = FileSystemStorage(
                    location=os.path.join(
                        settings.MEDIA_ROOT,
                        settings.FILEBROWSER_DIRECTORY,
                        session_id))

                stored_file_name = storage.save(file_.name, file_.file)

                required_locators = ['date',
                                     'party',
                                     'term',
                                     'geoentity',
                                     'currency',
                                     'citation',
                                     'definition',
                                     'duration']

                linked_tasks = [
                    {'task_name': 'Locate',
                     'locate': required_locators,
                     'parse': 'sentences',
                     'do_delete': False,
                     'metadata': {'session_id': session_id, 'file_name': file_.name},
                     'user_id': request.user.id}
                ]

                document_type = UploadSession.objects.get(pk=session_id).project.type

                # if Document type specified
                if document_type:

                    for app_name in custom_apps:
                        module_str = 'apps.%s.tasks' % app_name
                        module = sys.modules.get(module_str)
                        if hasattr(module, 'DetectFieldValues'):
                            linked_tasks.append(
                                {'task_name': 'DetectFieldValues',
                                 'module_name': module_str,
                                 'do_not_write': False,
                                 'metadata': {'session_id': session_id, 'file_name': file_.name},
                                 'user_id': request.user.id})

                call_task(
                    task_name='LoadDocuments',
                    source_path=os.path.join(session_id, stored_file_name),
                    user_id=request.user.id,
                    metadata={'session_id': session_id, 'file_name': file_.name},
                    linked_tasks=linked_tasks)
            except Exception as e:
                raise APIException(str(e))
        else:
            raise ValidationError('Provide session_id and file in request data.')
        return Response('Loaded')
 def handle(self, *args, **options):
     call_task('Total Cleanup', **options)
示例#30
0
 def handle(self, *args, **options):
     call_task('Load Entities', **options)