def find_files_in_path_not_in_external_dirs(fid, path, external, algorithm, rootdir=""): files = [] external = [e[1] for e in external] for root, _dirnames, filenames in walk(path): for fname in filenames: filepath = os.path.join(root, fname) relpath = os.path.relpath(filepath, path) in_external = False for e in external: if in_directory(relpath, e): in_external = True if in_external: continue fileinfo = parse_file(filepath, fid, relpath, algorithm=algorithm, rootdir=rootdir) files.append(fileinfo) return files
def parse_files(fid, path, external, algorithm, rootdir): files = [] if os.path.isfile(path): relpath = os.path.basename(path) file_info = parse_file(path, fid, relpath, algorithm=algorithm) files.append(file_info) elif os.path.isdir(path): found_files = find_files_in_path_not_in_external_dirs(fid, path, external, algorithm, rootdir) files.extend(found_files) return files
def find_files_in_path_not_in_external_dirs(fid, path, external, algorithm, rootdir=""): files = [] for root, dirnames, filenames in walk(path): dirnames[:] = [d for d in dirnames if d not in [e[1] for e in external]] for fname in filenames: filepath = os.path.join(root, fname) relpath = os.path.relpath(filepath, path) fileinfo = parse_file(filepath, fid, relpath, algorithm=algorithm, rootdir=rootdir) files.append(fileinfo) return files
def generate(self, filesToCreate, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, relpath=None, algorithm='SHA-256'): self.toCreate = [] for fname, content in filesToCreate.items(): self.toCreate.append({ 'file': fname, 'template': content['spec'], 'data': content.get('data', {}), 'root': XMLElement(content['spec'], fid=self.fid) }) if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] files = parsed_files # See if any profile allows unknown file types. # If atleast one does allow it, we allow it for all profiles. allow_unknown_file_types = False for _idx, f in enumerate(self.toCreate): allow_unknown_file_types = f.get('data', {}).get('allow_unknown_file_types', False) if allow_unknown_file_types: break self.fid.allow_unknown_file_types = allow_unknown_file_types if folderToParse: folderToParse = str(folderToParse).rstrip('/') external = self.find_external_dirs() if external: external_gen = XMLGenerator() for ext_file, ext_dir, ext_spec, ext_pointer, ext_data in external: ext_root = os.path.join(folderToParse, ext_dir) try: ext_sub_dirs = next(walk(ext_root))[1] except StopIteration: logger.info('No directories found in {}'.format(ext_root)) else: for sub_dir in ext_sub_dirs: ptr_file_path = os.path.join(ext_dir, sub_dir, ext_file) ptr_file_path = normalize_path(ptr_file_path) ext_info = copy.deepcopy(ext_data) ext_info['_EXT'] = sub_dir ext_info['_EXT_HREF'] = ptr_file_path external_to_create = { os.path.join(folderToParse, ptr_file_path): {'spec': ext_spec, 'data': ext_info} } external_gen.generate(external_to_create, os.path.join(folderToParse, ext_dir, sub_dir)) if ext_pointer is not None: filepath = os.path.join(folderToParse, ptr_file_path) fileinfo = parse_file( filepath, self.fid, ptr_file_path, algorithm=algorithm, rootdir=sub_dir ) files.append(fileinfo) files.extend(parse_files(self.fid, folderToParse, external, algorithm, rootdir="")) for path in extra_paths_to_parse: files.extend(parse_files(self.fid, path, external, algorithm, rootdir=path)) for idx, f in enumerate(self.toCreate): fname = f['file'] rootEl = f['root'] data = f.get('data', {}) data['_XML_FILENAME'] = os.path.basename(fname) self.tree = etree.ElementTree( rootEl.createLXMLElement(data, files=files, folderToParse=folderToParse, algorithm=algorithm) ) self.write(fname) if relpath: relfilepath = os.path.relpath(fname, relpath) elif folderToParse: relfilepath = os.path.relpath(fname, folderToParse) else: relfilepath = fname if idx < len(self.toCreate) - 1: fileinfo = parse_file(fname, self.fid, relfilepath, algorithm=algorithm) files.append(fileinfo)
def createLXMLElement(self, info, nsmap=None, files=None, folderToParse='', parent=None, algorithm=None): if nsmap is None: nsmap = {} if files is None: files = [] self.parent = parent if parent is not None: siblings_same_name = len(parent.el.findall(self.name)) self.parent_pos = siblings_same_name else: self.parent_pos = 0 full_nsmap = nsmap.copy() full_nsmap.update(self.nsmap) if self.namespace: self.el = etree.Element("{{{}}}{}".format(full_nsmap[self.namespace], self.name), nsmap=full_nsmap) else: self.el = etree.Element("{}".format(self.name), nsmap=full_nsmap) self.el.text = self.parse(info) for req_param in self.requiredParameters: if info.get(req_param) is None or info.get(req_param, '') == '': return None if self.condition is not None: condition = parseContent(self.condition, info) if condition == 'False': return None for attr in self.attr: name, content, required = attr.parse(info, nsmap=full_nsmap) if required and not content: raise ValueError( "Missing value for required attribute '{}' on element '{}'".format( name, self.get_path() ) ) elif content or attr.allow_empty: self.el.set(name, content) if self.external: ext_root = os.path.join(folderToParse, self.external['-dir']) try: ext_dirs = next(walk(ext_root))[1] except StopIteration: logger.info('No directories found in {}'.format(ext_root)) else: for ext_dir in natsorted(ext_dirs): if '-pointer' in self.external: ptr = XMLElement(self.external['-pointer'], fid=self.fid) ptr_file_path = os.path.join(self.external['-dir'], ext_dir, self.external['-file']) ptr_file_path = normalize_path(ptr_file_path) ptr_info = info ptr_info['_EXT'] = ext_dir ptr_info['_EXT_HREF'] = ptr_file_path filepath = os.path.join(folderToParse, ptr_file_path) fileinfo = parse_file( filepath, self.fid, ptr_file_path, algorithm=algorithm, rootdir=ext_dir ) for k, v in fileinfo.items(): if k[0] == 'F': ptr_info['_EXT_{}'.format(k[1:])] = v else: ptr_info['_EXT_{}'.format(k)] = v child_el = ptr.createLXMLElement( ptr_info, full_nsmap, folderToParse=folderToParse, parent=self, algorithm=algorithm, ) if child_el is not None: self.add_element(ptr) for child_idx, child in enumerate(self.children): child.parent = self child.parent_pos = child_idx if child.containsFiles: for fileinfo in files: include = True for key, file_filter in child.fileFilters.items(): if not re.search(file_filter, fileinfo.get(key)): include = False if include: logger.debug('Creating child element with additional file data: {data}'.format(data=fileinfo)) full_info = info.copy() full_info.update(fileinfo) child_el = child.createLXMLElement( full_info, full_nsmap, files=files, folderToParse=folderToParse, parent=self, algorithm=algorithm, ) if child_el is not None: self.add_element(child) elif child.foreach is not None: try: foreach_el = info[child.foreach] except KeyError: msg = 'Foreach key "{key}" for {el} not found in data'.format( key=child.foreach, el=child.get_path() ) logger.warning(msg) continue try: iterator = foreach_el.items() except AttributeError: iterator = enumerate(foreach_el) for idx, v in iterator: child_info = copy.deepcopy(info) child_info.update(v) child_info['{foreach}__key'.format(foreach=child.foreach)] = idx child_el = child.createLXMLElement( child_info, full_nsmap, files=files, folderToParse=folderToParse, parent=self, algorithm=algorithm, ) if child_el is not None: self.add_element(child) else: child_el = child.createLXMLElement( info, full_nsmap, files=files, folderToParse=folderToParse, parent=self, algorithm=algorithm, ) if child_el is not None: self.add_element(child) if self.nestedXMLContent: # we encode the XML to get around LXML limitation with XML strings # containing encoding information. # # See: # https://stackoverflow.com/questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported if self.nestedXMLContent not in info: logger.warning( "Nested XML '{}' not found in data and will not be created".format(self.nestedXMLContent) ) if not self.allowEmpty: return None else: nested_xml = bytes(bytearray(info[self.nestedXMLContent], encoding='utf-8')) parser = etree.XMLParser(remove_blank_text=True) self.el.append(etree.fromstring(nested_xml, parser=parser)) is_empty = self.isEmpty(info) if is_empty and self.required: raise ValueError("Missing value for required element '%s'" % (self.get_path())) if is_empty and not self.allowEmpty: return None if len(self.el) == 0 and self.skipIfNoChildren: return None if self.contentIsEmpty(info) and self.hideEmptyContent: return None return self.el
def generate(self, filesToCreate, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, relpath=None, algorithm='SHA-256'): self.toCreate = [] for fname, content in six.iteritems(filesToCreate): self.toCreate.append({ 'file': fname, 'template': content['spec'], 'data': content.get('data', {}), 'root': XMLElement(content['spec']) }) if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] files = parsed_files # See if any profile allows unknown file types. # If atleast one does allow it, we allow it for all profiles. allow_unknown_file_types = False for idx, f in enumerate(self.toCreate): allow_unknown_file_types = f.get('data', {}).get( 'allow_unknown_file_types', False) if allow_unknown_file_types: break self.fid.allow_unknown_file_types = allow_unknown_file_types if folderToParse: folderToParse = six.text_type(folderToParse).rstrip('/') external = self.find_external_dirs() if external: external_gen = XMLGenerator() for ext_file, ext_dir, ext_spec, ext_data in external: ext_sub_dirs = next(walk(os.path.join(folderToParse, ext_dir)))[1] for sub_dir in ext_sub_dirs: ptr_file_path = os.path.join(ext_dir, sub_dir, ext_file) ext_info = copy.deepcopy(ext_data) ext_info['_EXT'] = sub_dir ext_info['_EXT_HREF'] = ptr_file_path external_to_create = { os.path.join(folderToParse, ptr_file_path): { 'spec': ext_spec, 'data': ext_info } } external_gen.generate( external_to_create, os.path.join(folderToParse, ext_dir, sub_dir)) filepath = os.path.join(folderToParse, ptr_file_path) fileinfo = parse_file(filepath, self.fid, ptr_file_path, algorithm=algorithm, rootdir=sub_dir) files.append(fileinfo) if os.path.isfile(folderToParse): filepath = folderToParse relpath = os.path.basename(folderToParse) fileinfo = parse_file(filepath, self.fid, relpath, algorithm=algorithm) files.append(fileinfo) elif os.path.isdir(folderToParse): for root, dirnames, filenames in walk(folderToParse): dirnames[:] = [ d for d in dirnames if d not in [e[1] for e in external] ] for fname in filenames: filepath = os.path.join(root, fname) relpath = os.path.relpath(filepath, folderToParse) fileinfo = parse_file(filepath, self.fid, relpath, algorithm=algorithm) files.append(fileinfo) for path in extra_paths_to_parse: if os.path.isfile(path): relpath = os.path.basename(path) fileinfo = parse_file(path, self.fid, relpath, algorithm=algorithm) files.append(fileinfo) elif os.path.isdir(path): for root, dirnames, filenames in walk(path): dirnames[:] = [ d for d in dirnames if d not in [e[1] for e in external] ] for fname in filenames: filepath = os.path.join(root, fname) relpath = os.path.relpath(filepath, path) fileinfo = parse_file(filepath, self.fid, relpath, algorithm=algorithm, rootdir=path) files.append(fileinfo) for idx, f in enumerate(self.toCreate): fname = f['file'] rootEl = f['root'] data = f.get('data', {}) data['_XML_FILENAME'] = os.path.basename(fname) logger.debug(u'Creating {f} with {d}'.format(f=fname, d=data)) self.tree = etree.ElementTree( rootEl.createLXMLElement(data, files=files, folderToParse=folderToParse)) self.write(fname) if relpath: relfilepath = os.path.relpath(fname, relpath) elif folderToParse: relfilepath = os.path.relpath(fname, folderToParse) else: relfilepath = fname if idx < len(self.toCreate) - 1: fileinfo = parse_file(fname, self.fid, relfilepath, algorithm=algorithm) files.append(fileinfo)