def parse_eget_element(self, el): data_mappings = { 'varde': 'Varde', } data = self.parse_mappings(data_mappings, el) data['namn'] = el.get('Namn') if data['namn'] is not None: data['namn'] = remove_prefix(remove_prefix(data['namn'], "Dokument/"), u"Ärende/") data['datatyp'] = el.get('DataTyp') data['format'] = el.get('Format') data['element'] = [self.parse_eget_element(e) for e in el.xpath('*[local-name()="EgetElement"]')] data['egenskaper'] = [self.parse_egenskaper(e) for e in el.xpath('*[local-name()="Egenskaper"]/*[local-name()="Egenskap"]')] return data
def path(self, path): if path is None: self.paths = self.props.get('path', ['']) if isinstance(self.paths, str): self.paths = [self.paths] for path in self.paths: path = get_value_from_path(self.el, path) if path is not None: break self.path_prefix = self.props.get('pathprefix', []) for prefix in sorted(self.path_prefix, key=len, reverse=True): no_prefix = remove_prefix(path, prefix) if no_prefix != path: path = no_prefix break if self.props.get('path_includes_root', False): path = path.split('/', 1)[-1] path = path.lstrip('/ ') self._path = normalize_path(path)
def run(self, ip=None, xmlfile=None, validate_fileformat=True, validate_integrity=True, rootdir=None): step = ProcessStep.objects.create( name="Validate Files", parallel=True, parent_step=self.taskobj.processstep ) if any([validate_fileformat, validate_integrity]): if rootdir is None: rootdir = ip.ObjectPath doc = etree.ElementTree(file=xmlfile) for elname, props in settings.FILE_ELEMENTS.iteritems(): for f in doc.xpath('.//*[local-name()="%s"]' % elname): fpath = get_value_from_path(f, props["path"]) if fpath: fpath = remove_prefix(fpath, props.get("pathprefix", "")) fformat = get_value_from_path(f, props.get("format")) checksum = get_value_from_path(f, props.get("checksum")) algorithm = get_value_from_path(f, props.get("checksumtype")) if validate_fileformat and fformat is not None: step.tasks.add(ProcessTask.objects.create( name=self.fileformat_task, params={ "filename": os.path.join(rootdir, fpath), "fileformat": fformat, }, log=self.taskobj.log, information_package=ip, responsible=self.taskobj.responsible, )) if validate_integrity and checksum is not None: step.tasks.add(ProcessTask.objects.create( name=self.checksum_task, params={ "filename": os.path.join(rootdir, fpath), "checksum": checksum, "algorithm": algorithm, }, log=self.taskobj.log, information_package=ip, responsible=self.taskobj.responsible, )) self.taskobj.log = None self.taskobj.save(update_fields=['log']) self.set_progress(100, total=100) with allow_join_result(): return step.run().get()
def run(self, dirname=None, files=[], files_reldir=None, xmlfile=None): if dirname: xmlrelpath = os.path.relpath(xmlfile, dirname) xmlrelpath = remove_prefix(xmlrelpath, "./") else: xmlrelpath = xmlfile doc = etree.ElementTree(file=xmlfile) root = doc.getroot() logical_files = set() physical_files = set() for elname, props in settings.FILE_ELEMENTS.iteritems(): for f in doc.xpath('.//*[local-name()="%s"]' % elname): filename = get_value_from_path(f, props["path"]) if filename: filename = remove_prefix(filename, props.get("pathprefix", "")) logical_files.add(filename) if dirname: for root, dirs, filenames in os.walk(dirname): for f in filenames: if f != xmlrelpath: reldir = os.path.relpath(root, dirname) relfile = os.path.join(reldir, f) relfile = win_to_posix(relfile) relfile = remove_prefix(relfile, "./") physical_files.add(relfile) for f in files: if files_reldir: f = os.path.relpath(f, files_reldir) physical_files.add(f) assert logical_files == physical_files, "the logical representation differs from the physical" self.set_progress(100, total=100) return "Success"
def run(self, dirname=None, files=[], files_reldir=None, xmlfile=None, rootdir=""): if dirname: xmlrelpath = os.path.relpath(xmlfile, dirname) xmlrelpath = remove_prefix(xmlrelpath, "./") else: xmlrelpath = xmlfile logical_files = find_files(xmlfile, rootdir) physical_files = set() if dirname: for root, dirs, filenames in walk(dirname): for f in filenames: reldir = os.path.relpath(root, dirname) relfile = os.path.join(reldir, f) relfile = win_to_posix(relfile) relfile = remove_prefix(relfile, "./") if relfile != xmlrelpath: physical_files.add(relfile) for f in files: if files_reldir: if f == files_reldir: physical_files.add(os.path.basename(f)) continue f = os.path.relpath(f, files_reldir) physical_files.add(f) assert logical_files == physical_files, "the logical representation differs from the physical" return "Success"
def get_sorting(self, request): sort = list() ordering = request.query_params.get('ordering', '').strip() if ordering == '': return sort fields = ordering.split(',') for f in fields: direction = 'desc' if f.startswith('-') else 'asc' f = remove_prefix(f, '-') verified_f = self.verify_sort_field(f, direction) if verified_f is False: raise exceptions.ParseError('Invalid sort field: {}'.format(f)) sort.append(verified_f) return sort
def get_sorting(self, request): sort = list() ordering = request.query_params.get('ordering', '').strip() if ordering == '': return sort fields = ordering.split(',') for f in fields: direction = 'desc' if f.startswith('-') else 'asc' f = remove_prefix(f, '-') verified_f = self.verify_sort_field(f, direction) if verified_f is False: raise exceptions.ParseError('Invalid sort field: {}'.format(f)) sort.append(verified_f) return sort
def __init__(self, el, props, path=None, rootdir=None): ''' args: el: lxml.etree._Element props: 'dict with properties from FILE_ELEMENTS' ''' self.path = path if self.path is None: self.paths = props.get('path', ['']) if isinstance(self.paths, six.string_types): self.paths = [self.paths] for path in self.paths: self.path = get_value_from_path(el, path) if self.path is not None: break self.path_prefix = props.get('pathprefix', []) for prefix in sorted(self.path_prefix, key=len, reverse=True): no_prefix = remove_prefix(self.path, prefix) if no_prefix != self.path: self.path = no_prefix break if props.get('path_includes_root', False): self.path = self.path.split('/', 1)[-1] self.path = self.path.lstrip('/ ') self.checksum = get_value_from_path(el, props.get('checksum', '')) self.checksum = self.checksum.lower( ) if self.checksum is not None else self.checksum self.checksum_type = get_value_from_path(el, props.get('checksumtype', '')) self.checksum_type = self.checksum_type.lower( ) if self.checksum_type is not None else self.checksum_type self.size = get_value_from_path(el, props.get('size', '')) self.size = int(self.size) if self.size is not None else None self.format = get_value_from_path(el, props.get('format', ''))
def __init__(self, el, props): ''' args: el: lxml.etree._Element props: 'dict with properties from FILE_ELEMENTS' ''' self.path = get_value_from_path(el, props.get('path', '')) self.path_prefix = props.get('pathprefix', []) for prefix in sorted(self.path_prefix, key=len, reverse=True): no_prefix = remove_prefix(self.path, prefix) if no_prefix != self.path: self.path = no_prefix break self.path = self.path.lstrip('/ ') self.checksum = get_value_from_path(el, props.get('checksum', '')) self.checksum_type = get_value_from_path(el, props.get('checksumtype', '')) self.format = get_value_from_path(el, props.get('format', ''))
def list(self, request): reception = Path.objects.get(entity="path_ingest_reception").value uip = Path.objects.get(entity="path_ingest_unidentified").value ips = [] for xmlfile in glob.glob(os.path.join(reception, "*.xml")) + glob.glob(os.path.join(uip, "*.xml")): if os.path.isfile(xmlfile): if xmlfile.startswith(uip): srcdir = uip else: srcdir = reception ip = self.parseFile(xmlfile, srcdir) if not InformationPackage.objects.filter(id=ip['id']).exists(): ips.append(ip) for container_file in glob.glob(os.path.join(uip, "*.tar")) + glob.glob(os.path.join(uip, "*.zip")): ip = { 'Label': os.path.basename(container_file), 'CreateDate': str(timestamp_to_datetime(creation_date(container_file)).isoformat()), 'State': 'Unidentified', 'status': 0, 'step_state': celery_states.SUCCESS, } include = True for xmlfile in glob.glob(os.path.join(uip, "*.xml")): if os.path.isfile(xmlfile): doc = etree.parse(xmlfile) root = doc.getroot() el = root.xpath('.//*[local-name()="%s"]' % "FLocat")[0] if ip['Label'] == get_value_from_path(el, "@href").split('file:///')[1]: include = False break if include: ips.append(ip) from_db = InformationPackage.objects.filter(State='Receiving').prefetch_related( Prefetch('profileip_set', to_attr='profiles'), ) serializer = InformationPackageSerializer( data=from_db, many=True, context={'request': request} ) serializer.is_valid() ips.extend(serializer.data) try: ordering = request.query_params.get('ordering', '') reverse = ordering.startswith('-') ordering = remove_prefix(ordering, '-') ips = sorted(ips, key=lambda k: k[ordering], reverse=reverse) except KeyError: pass paginator = LinkHeaderPagination() page = paginator.paginate_queryset(ips, request) if page is not None: return paginator.get_paginated_response(page) return Response(ips)