Пример #1
0
def find_files_in_path_not_in_external_dirs(fid,
                                            path,
                                            external,
                                            algorithm,
                                            rootdir=""):
    files = []
    external = [e[1] for e in external]
    for root, _dirnames, filenames in walk(path):
        for fname in filenames:
            filepath = os.path.join(root, fname)
            relpath = os.path.relpath(filepath, path)

            in_external = False
            for e in external:
                if in_directory(relpath, e):
                    in_external = True
            if in_external:
                continue

            fileinfo = parse_file(filepath,
                                  fid,
                                  relpath,
                                  algorithm=algorithm,
                                  rootdir=rootdir)
            files.append(fileinfo)
    return files
Пример #2
0
def Action(self, tool, pattern, rootdir, options, purpose=None):
    def _convert(path, rootdir, tool, options):
        tool.run(path, rootdir, options)

        relpath = PurePath(path).relative_to(rootdir).as_posix()
        EventIP.objects.create(
            eventType_id=50750,
            eventOutcome=EventIP.SUCCESS,
            eventOutcomeDetailNote='{type} {relpath}'.format(
                type=tool.type.capitalize(), relpath=relpath),
            linkingObjectIdentifierValue=str(
                self.get_information_package().pk),
            linkingAgentIdentifierValue=User.objects.get(pk=self.responsible))

        if tool.delete_original:
            os.remove(path)

    ip = self.get_information_package()
    tool = ActionTool.objects.get(name=tool)

    msg = '{type} job started, purpose: {purpose}'.format(
        type=tool.type.capitalize(), purpose=purpose)
    self.create_success_event(msg)

    if tool.file_processing:
        for path in iglob(rootdir + '/' + pattern, case_sensitive=False):
            if not in_directory(path, rootdir):
                raise ValueError(
                    'Invalid file-pattern accessing files outside of package')

            if os.path.isdir(path):
                for root, _dirs, files in os.walk(path):
                    for f in files:
                        fpath = os.path.join(root, f)
                        _convert(fpath, rootdir, tool, options)
            else:
                _convert(path, rootdir, tool, options)
    else:
        filepath = os.path.join(rootdir, pattern)
        tool.run(filepath, rootdir, options)
        if tool.delete_original:
            os.remove(filepath)

    Notification.objects.create(message='{type} job done for "{ip}"'.format(
        type=tool.type.capitalize(), ip=ip.object_identifier_value),
                                level=logging.INFO,
                                user_id=self.responsible,
                                refresh=True)

    msg = '{type} job done, purpose: {purpose}'.format(
        type=tool.type.capitalize(), purpose=purpose)
    self.create_success_event(msg)
Пример #3
0
    def _run(self):
        self.delete_event_type = EventType.objects.get(eventType=50750)

        ips = self.information_packages
        tmpdir = Path.objects.get(entity='temp').value

        for ip in ips.iterator():
            storage_obj: Optional[StorageObject] = ip.storage.readable(
            ).fastest().first()
            if storage_obj is None:
                raise NoReadableStorage

            new_ip = ip.create_new_generation(ip.state, ip.responsible, None)
            new_ip_tmpdir = os.path.join(tmpdir,
                                         new_ip.object_identifier_value)
            storage_obj.read(new_ip_tmpdir, None, extract=True)
            new_ip.object_path = new_ip_tmpdir
            new_ip.save()

            # convert files specified in rule
            for pattern, spec in self.specification.items():
                tool = ActionTool.objects.get(name=spec['tool'])
                options = spec['options']

                for path in iglob(new_ip_tmpdir + '/' + pattern,
                                  case_sensitive=False):
                    if not in_directory(path, new_ip_tmpdir):
                        raise ValueError(
                            'Invalid file-pattern accessing files outside of package'
                        )

                    if os.path.isdir(path):
                        for root, _dirs, files in walk(path):
                            for f in files:
                                fpath = os.path.join(root, f)
                                self.convert(ip, fpath, new_ip_tmpdir, tool,
                                             options, new_ip)
                    else:
                        self.convert(ip, path, new_ip_tmpdir, tool, options,
                                     new_ip)

            with allow_join_result():
                preserve_new_generation(new_ip)
Пример #4
0
    def _run(self):
        self.delete_event_type = EventType.objects.get(eventType=50710)
        entries = []

        for t in self.tags.select_related('current_version').exclude(
                current_version__elastic_index='document').all():
            entries.append(
                AppraisalJobEntry(
                    job=self,
                    start_date=timezone.now(),
                    end_date=timezone.now(),
                    component=t.current_version,
                ))

        AppraisalJobEntry.objects.bulk_create(entries)

        ips = self.information_packages
        logger.info(
            'Running appraisal job {} on {} information packages'.format(
                self.pk, ips.count()))

        delete_packages = getattr(settings, 'DELETE_PACKAGES_ON_APPRAISAL',
                                  False)
        tmpdir = Path.objects.get(entity='temp').value

        for ip in ips.iterator():
            storage_obj: Optional[StorageObject] = ip.storage.readable(
            ).fastest().first()
            if storage_obj is None:
                raise NoReadableStorage

            if not self.package_file_pattern:
                ip_tmpdir = os.path.join(tmpdir, ip.object_identifier_value)
                os.makedirs(ip_tmpdir, exist_ok=True)
                storage_obj.read(ip_tmpdir, None, extract=True)

                # register all files
                job_entry_start_date = timezone.now()
                job_entry_end_date = timezone.now()
                job_entries = []
                for root, _dirs, files in walk(ip_tmpdir):
                    for f in files:
                        rel = PurePath(os.path.join(
                            root, f)).relative_to(ip_tmpdir).as_posix()
                        job_entries.append(
                            AppraisalJobEntry(
                                job=self,
                                start_date=job_entry_start_date,
                                end_date=job_entry_end_date,
                                ip=ip,
                                document=rel,
                            ))
                        EventIP.objects.create(
                            eventType=self.delete_event_type,
                            eventOutcome=EventIP.SUCCESS,
                            eventOutcomeDetailNote='Deleted {}'.format(rel),
                            linkingObjectIdentifierValue=ip.
                            object_identifier_value,
                        )

                AppraisalJobEntry.objects.bulk_create(job_entries)

                if delete_packages:
                    for storage_obj in ip.storage.all():
                        storage_obj.delete_files()
                    ip.delete()
                else:
                    # inactivate old generations
                    InformationPackage.objects.filter(
                        aic=ip.aic, generation__lte=ip.generation).update(
                            active=False, last_changed_local=timezone.now())

            else:
                new_ip = ip.create_new_generation(ip.state, ip.responsible,
                                                  None)
                new_ip_tmpdir = os.path.join(tmpdir,
                                             new_ip.object_identifier_value)
                storage_obj.read(new_ip_tmpdir, None, extract=True)
                new_ip.object_path = new_ip_tmpdir
                new_ip.save()

                # delete files specified in rule
                for pattern in cast(List[str], self.package_file_pattern):
                    for path in iglob(new_ip_tmpdir + '/' + pattern,
                                      case_sensitive=False):
                        if not in_directory(path, new_ip_tmpdir):
                            raise ValueError(
                                'Invalid file-pattern accessing files outside of package'
                            )

                        if os.path.isdir(path):
                            for root, _dirs, files in walk(path):
                                for f in files:
                                    rel = PurePath(os.path.join(
                                        root, f)).relative_to(
                                            new_ip_tmpdir).as_posix()
                                    self.delete_file(ip, os.path.join(root, f),
                                                     rel, new_ip)

                            shutil.rmtree(path)

                        else:
                            rel = PurePath(path).relative_to(
                                new_ip_tmpdir).as_posix()
                            self.delete_file(ip, path, rel, new_ip)

                self.delete_document_tags(ip, new_ip, new_ip_tmpdir)

                with allow_join_result():
                    preserve_new_generation(new_ip)

                ip.tags.exclude(
                    current_version__elastic_index='document', ).update(
                        information_package=new_ip)

                if delete_packages:
                    for storage_obj in ip.storage.all():
                        storage_obj.delete_files()
                    ip.delete()
                else:
                    # inactivate old generations
                    InformationPackage.objects.filter(
                        aic=ip.aic,
                        generation__lte=ip.generation).update(active=False)
                    ip.tags.filter(
                        current_version__elastic_index='document').delete()

        document_tag_ips = InformationPackage.objects.exclude(
            appraisal_jobs=self).filter(
                tags__appraisal_jobs=self,
                tags__current_version__elastic_index='document',
            ).distinct()
        for ip in document_tag_ips.iterator():
            storage_obj: Optional[StorageObject] = ip.storage.readable(
            ).fastest().first()
            if storage_obj is None:
                raise NoReadableStorage

            new_ip = ip.create_new_generation(ip.state, ip.responsible, None)
            new_ip_tmpdir = os.path.join(tmpdir,
                                         new_ip.object_identifier_value)
            storage_obj.read(new_ip_tmpdir, None, extract=True)
            new_ip.object_path = new_ip_tmpdir
            new_ip.save()

            self.delete_document_tags(ip, new_ip, new_ip_tmpdir)

            with allow_join_result():
                preserve_new_generation(new_ip)

            ip.tags.exclude(
                current_version__elastic_index='document', ).update(
                    information_package=new_ip)

            if delete_packages:
                for storage_obj in ip.storage.all():
                    storage_obj.delete_files()
                ip.delete()
            else:
                # inactivate old generations
                InformationPackage.objects.filter(
                    aic=ip.aic,
                    generation__lte=ip.generation).update(active=False)
                ip.tags.filter(
                    current_version__elastic_index='document').delete()

        self.tags.all().delete()
Пример #5
0
    def files(self, path=''):
        mimetypes.suffix_map = {}
        mimetypes.encodings_map = {}
        mimetypes.types_map = {}
        mimetypes.common_types = {}
        mimetypes_file = Path.objects.get(
            entity="path_mimetypes_definitionfile").value
        mimetypes.init(files=[mimetypes_file])
        mtypes = mimetypes.types_map

        MAX_FILE_SIZE = 100000000  # 100 MB

        if os.path.isfile(self.object_path):
            container = self.object_path
            xml = os.path.splitext(self.object_path)[0] + '.xml'

            if path.startswith(os.path.basename(container)):
                fullpath = os.path.join(os.path.dirname(container), path)

                if tarfile.is_tarfile(container):
                    with tarfile.open(container) as tar:
                        if fullpath == container:
                            entries = []
                            for member in tar.getmembers():
                                if not member.isfile():
                                    continue

                                entries.append({
                                    "name":
                                    member.name,
                                    "type":
                                    'file',
                                    "size":
                                    member.size,
                                    "modified":
                                    timestamp_to_datetime(member.mtime),
                                })
                            return Response(entries)
                        else:
                            subpath = fullpath[len(container) + 1:]
                            try:
                                member = tar.getmember(subpath)

                                if not member.isfile():
                                    raise exceptions.NotFound

                                f = tar.extractfile(member)
                                content_type = mtypes.get(
                                    os.path.splitext(subpath)[1])
                                response = HttpResponse(
                                    f.read(), content_type=content_type)
                                response[
                                    'Content-Disposition'] = 'inline; filename="%s"' % os.path.basename(
                                        f.name)
                                if content_type is None:
                                    response[
                                        'Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(
                                            f.name)
                                return response
                            except KeyError:
                                raise exceptions.NotFound

                elif zipfile.is_zipfile(container):
                    with zipfile.ZipFile(container) as zipf:
                        if fullpath == container:
                            entries = []
                            for member in zipf.filelist:
                                if member.filename.endswith('/'):
                                    continue

                                entries.append({
                                    "name":
                                    member.filename,
                                    "type":
                                    'file',
                                    "size":
                                    member.file_size,
                                    "modified":
                                    datetime.datetime(*member.date_time),
                                })
                            return Response(entries)
                        else:
                            subpath = fullpath[len(container) + 1:]
                            try:
                                f = zipf.open(subpath)
                                content_type = mtypes.get(
                                    os.path.splitext(subpath)[1])
                                response = HttpResponse(
                                    f.read(), content_type=content_type)
                                response[
                                    'Content-Disposition'] = 'inline; filename="%s"' % os.path.basename(
                                        f.name)
                                if content_type is None:
                                    response[
                                        'Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(
                                            f.name)
                                return response
                            except KeyError:
                                raise exceptions.NotFound

                content_type = mtypes.get(os.path.splitext(fullpath)[1])
                response = HttpResponse(open(fullpath).read(),
                                        content_type=content_type)
                response[
                    'Content-Disposition'] = 'inline; filename="%s"' % os.path.basename(
                        fullpath)
                if content_type is None:
                    response[
                        'Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(
                            fullpath)
                return response
            elif os.path.isfile(xml) and path == os.path.basename(xml):
                fullpath = os.path.join(os.path.dirname(container), path)
                content_type = mtypes.get(os.path.splitext(fullpath)[1])
                response = HttpResponse(open(fullpath).read(),
                                        content_type=content_type)
                response[
                    'Content-Disposition'] = 'inline; filename="%s"' % os.path.basename(
                        fullpath)
                if content_type is None:
                    response[
                        'Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(
                            fullpath)
                return response
            elif path == '':
                entries = []

                entries.append({
                    "name":
                    os.path.basename(container),
                    "type":
                    'file',
                    "size":
                    os.path.getsize(container),
                    "modified":
                    timestamp_to_datetime(os.path.getmtime(container)),
                })

                if os.path.isfile(xml):
                    entries.append({
                        "name":
                        os.path.basename(xml),
                        "type":
                        'file',
                        "size":
                        os.path.getsize(xml),
                        "modified":
                        timestamp_to_datetime(os.path.getmtime(xml)),
                    })
                return Response(entries)

            elif path is not None:
                raise exceptions.NotFound

        entries = []
        fullpath = os.path.join(self.object_path, path)

        if not in_directory(fullpath, self.object_path):
            raise exceptions.ParseError('Illegal path %s' % path)

        if not os.path.exists(fullpath):
            raise exceptions.NotFound

        if os.path.isfile(fullpath):
            content_type = mtypes.get(os.path.splitext(fullpath)[1])
            response = HttpResponse(open(fullpath).read(),
                                    content_type=content_type)
            response[
                'Content-Disposition'] = 'inline; filename="%s"' % os.path.basename(
                    fullpath)
            if content_type is None:
                response[
                    'Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(
                        fullpath)
            return response

        for entry in get_files_and_dirs(fullpath):
            entry_type = "dir" if entry.is_dir() else "file"

            if entry_type == 'file' and re.search(
                    r'\_\d+$', entry.name) is not None:  # file chunk
                continue

            size, _ = get_tree_size_and_count(entry.path)

            entries.append({
                "name":
                os.path.basename(entry.path),
                "type":
                entry_type,
                "size":
                size,
                "modified":
                timestamp_to_datetime(entry.stat().st_mtime),
            })

        sorted_entries = sorted(entries, key=itemgetter('name'))
        return Response(sorted_entries)
Пример #6
0
    def files(self, request, pk=None):
        ip = self.get_object()

        if request.method not in permissions.SAFE_METHODS:
            if ip.state not in ['Prepared', 'Uploading']:
                raise exceptions.ParseError(
                    "Cannot delete or add content of an IP that is not in 'Prepared' or 'Uploading' state"
                )

        if request.method == 'DELETE':
            try:
                path = request.data['path']
            except KeyError:
                return Response('Path parameter missing',
                                status=status.HTTP_400_BAD_REQUEST)

            root = ip.object_path
            fullpath = os.path.join(root, path)

            if not in_directory(fullpath, root):
                raise exceptions.ParseError('Illegal path %s' % path)

            try:
                shutil.rmtree(fullpath)
            except OSError as e:
                if e.errno == errno.ENOENT:
                    raise exceptions.NotFound('Path does not exist')

                if e.errno != errno.ENOTDIR:
                    raise

                os.remove(fullpath)

            return Response(status=status.HTTP_204_NO_CONTENT)

        if request.method == 'POST':
            try:
                path = request.data['path']
            except KeyError:
                return Response('Path parameter missing',
                                status=status.HTTP_400_BAD_REQUEST)

            try:
                pathtype = request.data['type']
            except KeyError:
                return Response('Type parameter missing',
                                status=status.HTTP_400_BAD_REQUEST)

            root = ip.object_path
            fullpath = os.path.join(root, path)

            if not in_directory(fullpath, root):
                raise exceptions.ParseError('Illegal path %s' % path)

            if pathtype == 'dir':
                try:
                    os.makedirs(fullpath)
                except OSError as e:
                    if e.errno == errno.EEXIST:
                        raise exceptions.ParseError(
                            'Directory %s already exists' % path)

                    raise

            elif pathtype == 'file':
                open(fullpath, 'a').close()
            else:
                return Response('Type must be either "file" or "dir"',
                                status=status.HTTP_400_BAD_REQUEST)

            return Response(path, status=status.HTTP_201_CREATED)

        path = request.query_params.get('path', '').rstrip('/')
        download = request.query_params.get('download', False)
        return ip.get_path_response(path,
                                    request,
                                    force_download=download,
                                    paginator=self.paginator)
Пример #7
0
 def validate_path(self, path):
     fullpath = os.path.join(self.object_path, path)
     if not in_directory(fullpath,
                         self.object_path) and fullpath != os.path.splitext(
                             self.object_path)[0] + '.xml':
         raise exceptions.ValidationError(u'Illegal path: {s}'.format(path))