def detect_encoding(tags: Iterable[DXFTag]) -> str: """ Detect text encoding from header variables $DWGCODEPAGE and $ACADVER out of a stream of DXFTag objects. Assuming a malformed DXF file: The header variables could reside outside of the HEADER section, an ENDSEC tag is not a reliable fact that no $DWGCODEPAGE or $ACADVER header variable will show up in the remaining tag stream. Worst case: DXF file without a $ACADVER var, and a $DWGCODEPAGE unequal to "ANSI_1252" at the end of the file. """ encoding = None dxfversion = None next_tag = None for code, value in tags: if code == 9: if value == DWGCODEPAGE: next_tag = DWGCODEPAGE # e.g. (3, "ANSI_1252") elif value == ACADVER: next_tag = ACADVER # e.g. (1, "AC1012") elif code == 3 and next_tag == DWGCODEPAGE: encoding = toencoding(value.decode(const.DEFAULT_ENCODING)) next_tag = None elif code == 1 and next_tag == ACADVER: dxfversion = value.decode(const.DEFAULT_ENCODING) next_tag = None if encoding and dxfversion: return 'utf8' if dxfversion >= const.DXF2007 else encoding return const.DEFAULT_ENCODING
def scan_params(): dxfversion = 'AC1009' encoding = 'cp1252' try: # Limit search to first 1024 bytes - an arbitrary number # start index for 1-byte group code start = data.index(b'$ACADVER', 22, 1024) + 10 except ValueError: pass # HEADER var $ACADVER not present else: if data[start] != 65: # not 'A' = 2-byte group code start += 1 dxfversion = data[start:start + 6].decode() if dxfversion >= 'AC1021': encoding = 'utf8' else: try: # Limit search to first 1024 bytes - an arbitrary number # start index for 1-byte group code start = data.index(b'$DWGCODEPAGE', 22, 1024) + 14 except ValueError: pass # HEADER var $DWGCODEPAGE not present else: # name schema is 'ANSI_xxxx' if data[start] != 65: # not 'A' = 2-byte group code start += 1 end = start + 5 while data[end] != 0: end += 1 codepage = data[start:end].decode() encoding = toencoding(codepage) return encoding, dxfversion
def __init__(self, tagger: Iterable['DXFTag']): """ Build a new DXF drawing from a steam of DXF tags. Args: tagger: generator or list of DXF tags as DXFTag() objects """ def get_header(sections: 'SectionDict') -> 'SectionType': from .sections.header import HeaderSection header_entities = sections.get('HEADER', [None])[0] # all tags in the first DXF structure entity return HeaderSection(header_entities) self.tracker = Tracker() self._dimension_renderer = DimensionRenderer() # set DIMENSION rendering engine self._groups = None # type: GroupManager # read only self._materials = None # type: MaterialManager # read only self._mleader_styles = None # type: MLeaderStyleManager # read only self._mline_styles = None # type: MLineStyleManager # read only self._acad_compatible = True # will generated DXF file compatible with AutoCAD self._acad_incompatibility_reason = set() # avoid multiple warnings for same reason self.filename = None # type: str # read/write self.entitydb = EntityDB() # read only sections = load_dxf_structure(tagger) # load complete DXF entity structure # create section HEADER header = get_header(sections) self.dxfversion = header.get('$ACADVER', 'AC1009') # type: str # read only self.dxffactory = dxffactory(self) # read only, requires self.dxfversion self.encoding = toencoding(header.get('$DWGCODEPAGE', 'ANSI_1252')) # type: str # read/write # get handle seed seed = header.get('$HANDSEED', str(self.entitydb.handles)) # type: str # setup handles self.entitydb.handles.reset(seed) # store all necessary DXF entities in the drawing database fill_database(self.entitydb, sections, dxfversion=self.dxfversion) # create sections: TABLES, BLOCKS, ENTITIES, CLASSES, OBJECTS self.sections = Sections(sections, drawing=self, header=header) if self.dxfversion > 'AC1009': self.rootdict = self.objects.rootdict self.objects.setup_objects_management_tables(self.rootdict) # create missing tables if self.dxfversion in ('AC1012', 'AC1014'): # releases R13 and R14 repair.upgrade_to_ac1015(self) # some applications don't setup properly the model and paper space layouts repair.setup_layouts(self) self._groups = self.objects.groups() self._materials = self.objects.materials() self._mleader_styles = self.objects.mleader_styles() self._mline_styles = self.objects.mline_styles() else: # dxfversion <= 'AC1009' do cleanup work, before building layouts if self.dxfversion < 'AC1009': # legacy DXF version repair.upgrade_to_ac1009(self) # upgrade to DXF format AC1009 (DXF R12) repair.cleanup_r12(self) # ezdxf puts automatically handles into all entities added to the entities database # write R12 without handles, by setting $HANDLING = 0 self.header['$HANDLING'] = 1 # write handles by default self.layouts = self.dxffactory.get_layouts()
def set_header_var(self, name: str, value: str) -> int: if name == '$ACADVER': self.version = value self.release = acad_release.get(value, 'R12') elif name == '$DWGCODEPAGE': self.encoding = toencoding(value) elif name == '$HANDSEED': self.handseed = value else: return 0 return 1
def single_pass_modelspace( stream: BinaryIO, types: Iterable[str] = None, errors: str = "surrogateescape", ) -> Iterable[DXFGraphic]: """Iterate over all modelspace entities as :class:`DXFGraphic` objects in one single pass. Use this function to 'quick' iterate over modelspace entities of a **not** seekable binary DXF stream, filtering DXF types may speed up things if many entity types will be skipped. Args: stream: (not seekable) binary DXF stream types: DXF types like ``['LINE', '3DFACE']`` which should be returned, ``None`` returns all supported types. errors: specify decoding error handler - "surrogateescape" to preserve possible binary data (default) - "ignore" to use the replacement char U+FFFD "\ufffd" for invalid data - "strict" to raise an :class:`UnicodeDecodeError` exception for invalid data Raises: DXFStructureError: Invalid or incomplete DXF file UnicodeDecodeError: if `errors` is "strict" and a decoding error occurs """ fetch_header_var: Optional[str] = None encoding = "cp1252" version = "AC1009" prev_code: int = -1 prev_value: str = "" entities = False requested_types = _requested_types(types) for code, value in binary_tagger(stream): if code == 0 and value == b"ENDSEC": break elif code == 2 and prev_code == 0 and value != b"HEADER": # (0, SECTION), (2, name) # First section is not the HEADER section entities = value == b"ENTITIES" break elif code == 9 and value == b"$DWGCODEPAGE": fetch_header_var = "ENCODING" elif code == 9 and value == b"$ACADVER": fetch_header_var = "VERSION" elif fetch_header_var == "ENCODING": encoding = toencoding(value.decode()) fetch_header_var = None elif fetch_header_var == "VERSION": version = value.decode() fetch_header_var = None prev_code = code if version >= "AC1021": encoding = "utf-8" queued: Optional[DXFGraphic] = None tags: List[DXFTag] = [] linked_entity = entity_linker() for tag in tag_compiler(binary_tagger(stream, encoding, errors)): code = tag.code value = tag.value if entities: if code == 0 and value == "ENDSEC": if queued: yield queued return if code == 0: if len(tags) and tags[0].value in requested_types: entity = cast(DXFGraphic, factory.load(ExtendedTags(tags))) if not linked_entity( entity) and entity.dxf.paperspace == 0: # queue one entity for collecting linked entities: # VERTEX, ATTRIB if queued: yield queued queued = entity tags = [tag] else: tags.append(tag) continue # if entities - nothing else matters elif code == 2 and prev_code == 0 and prev_value == "SECTION": entities = value == "ENTITIES" prev_code = code prev_value = value
def _load_section_dict(self, sections: loader.SectionDict) -> None: """ Internal API to load a DXF document from a section dict. """ self.is_loading = True # Create header section: # All header tags are the first DXF structure entity header_entities = sections.get('HEADER', [None])[0] if header_entities is None: # Create default header, files without header are by default DXF R12 self.header = HeaderSection.new(dxfversion=DXF12) else: self.header = HeaderSection.load(header_entities) self._dxfversion: str = self.header.get('$ACADVER', DXF12) # Store original DXF version of loaded file. self._loaded_dxfversion = self._dxfversion # Content encoding: self.encoding = toencoding(self.header.get('$DWGCODEPAGE', 'ANSI_1252')) # Set handle seed: seed: str = self.header.get('$HANDSEED', str(self.entitydb.handles)) self.entitydb.handles.reset(_validate_handle_seed(seed)) # Store all necessary DXF entities in the entity database: loader.load_and_bind_dxf_content(sections, self) # End of 1. loading stage, all entities of the DXF file are # stored in the entity database. # Create sections: self.classes = ClassesSection(self, sections.get('CLASSES', None)) self.tables = TablesSection(self, sections.get('TABLES', None)) # Create *Model_Space and *Paper_Space BLOCK_RECORDS # BlockSection setup takes care about the rest: self._create_required_block_records() # At this point all table entries are required: self.blocks = BlocksSection(self, sections.get('BLOCKS', None)) self.entities = EntitySection(self, sections.get('ENTITIES', None)) self.objects = ObjectsSection(self, sections.get('OBJECTS', None)) # only DXF R2013+ self.acdsdata = AcDsDataSection(self, sections.get('ACDSDATA', None)) # Store unmanaged sections as raw tags: for name, data in sections.items(): if name not in const.MANAGED_SECTIONS: self.stored_sections.append(StoredSection(data)) # Objects section is not initialized! self._2nd_loading_stage() # DXF version upgrades: if self.dxfversion < DXF12: logger.info('DXF version upgrade to DXF R12.') self.dxfversion = DXF12 if self.dxfversion == DXF12: self.tables.create_table_handles() if self.dxfversion in (DXF13, DXF14): logger.info('DXF version upgrade to DXF R2000.') self.dxfversion = DXF2000 self.create_all_arrow_blocks() # Objects section setup: self.rootdict = self.objects.rootdict # Create missing management tables (DICTIONARY): self.objects.setup_objects_management_tables(self.rootdict) # Setup modelspace- and paperspace layouts: self.layouts = Layouts.load(self) # Additional work is common to the new and load process: self.is_loading = False self._finalize_setup()
def _load(self, tagger: Iterable['DXFTag']): sections = load_dxf_structure( tagger) # load complete DXF entity structure try: # discard section THUMBNAILIMAGE del sections['THUMBNAILIMAGE'] except KeyError: pass # ----------------------------------------------------------------------------------- # create header section: # all header tags are the first DXF structure entity header_entities = sections.get('HEADER', [None])[0] if header_entities is None: # create default header, files without header are by default DXF R12 self.header = HeaderSection.new(dxfversion=DXF12) else: self.header = HeaderSection.load(header_entities) # ----------------------------------------------------------------------------------- # missing $ACADVER defaults to DXF R12 self._dxfversion = self.header.get('$ACADVER', DXF12) # type: str self._loaded_dxfversion = self._dxfversion # save dxf version of loaded file self.encoding = toencoding(self.header.get( '$DWGCODEPAGE', 'ANSI_1252')) # type: str # read/write # get handle seed seed = self.header.get('$HANDSEED', str(self.entitydb.handles)) # type: str # setup handles self.entitydb.handles.reset(seed) # store all necessary DXF entities in the drawing database fill_database(sections, self.dxffactory) # all handles used in the DXF file are known at this point # ----------------------------------------------------------------------------------- # create sections: self.classes = ClassesSection(self, sections.get('CLASSES', None)) self.tables = TablesSection(self, sections.get('TABLES', None)) # create *Model_Space and *Paper_Space BLOCK_RECORDS # BlockSection setup takes care about the rest self._create_required_block_records() # table records available self.blocks = BlocksSection(self, sections.get('BLOCKS', None)) self.entities = EntitySection(self, sections.get('ENTITIES', None)) self.objects = ObjectsSection(self, sections.get('OBJECTS', None)) # only valid for DXF R2013 and later self.acdsdata = AcDsDataSection(self, sections.get('ACDSDATA', None)) for name, data in sections.items(): if name not in MANAGED_SECTIONS: self.stored_sections.append(StoredSection(data)) # ----------------------------------------------------------------------------------- if self.dxfversion < DXF12: # upgrade to DXF R12 logger.info('Upgrading drawing to DXF R12.') self.dxfversion = DXF12 # DIMSTYLE: ezdxf uses names for blocks, linetypes and text style as internal data, handles are set at export # requires BLOCKS and TABLES section! self.tables.resolve_dimstyle_names() if self.dxfversion == DXF12: # TABLE requires in DXF12 no handle and has no owner tag, but DXF R2000+, requires a TABLE with handle # and each table entry has an owner tag, pointing to the TABLE entry self.tables.create_table_handles() if self.dxfversion in (DXF13, DXF14): # upgrade to DXF R2000 self.dxfversion = DXF2000 self.rootdict = self.objects.rootdict self.objects.setup_objects_management_tables( self.rootdict) # create missing tables self.layouts = Layouts.load(self) self._finalize_setup()
def _get_encoding(self): codepage = self.header.get('$DWGCODEPAGE', 'ANSI_1252') return toencoding(codepage)
def single_pass_modelspace(stream: BinaryIO, types: Iterable[str] = None) -> Iterable[DXFGraphic]: """ Iterate over all modelspace entities as :class:`DXFGraphic` objects in one single pass. Use this function to 'quick' iterate over modelspace entities of a **not** seekable binary DXF stream, filtering DXF types may speed up things if many entity types will be skipped. Args: stream: (not seekable) binary DXF stream types: DXF types like ``['LINE', '3DFACE']`` which should be returned, ``None`` returns all supported types. """ fetch_header_var: Optional[str] = None encoding = 'cp1252' version = 'AC1009' prev_code: int = -1 prev_value: str = '' entities = False requested_types = _requested_types(types) for code, value in binary_tagger(stream): if code == 0 and value == b'ENDSEC': break elif code == 2 and prev_code == 0 and value != b'HEADER': # (0, SECTION), (2, name) # First section is not the HEADER section entities = (value == b'ENTITIES') break elif code == 9 and value == b'$DWGCODEPAGE': fetch_header_var = 'ENCODING' elif code == 9 and value == b'$ACADVER': fetch_header_var = 'VERSION' elif fetch_header_var == 'ENCODING': encoding = toencoding(value.decode()) fetch_header_var = None elif fetch_header_var == 'VERSION': version = value.decode() fetch_header_var = None prev_code = code if version >= 'AC1021': encoding = 'utf-8' queued: Optional[DXFEntity] = None tags: List[DXFTag] = [] factory = EntityFactory() linked_entity = entity_linker() for tag in tag_compiler(binary_tagger(stream, encoding)): code = tag.code value = tag.value if entities: if code == 0 and value == 'ENDSEC': if queued: yield queued return if code == 0: if len(tags) and tags[0].value in requested_types: entity = factory.entity(ExtendedTags(tags)) if not linked_entity(entity) and entity.dxf.paperspace == 0: if queued: # queue one entity for collecting linked entities (VERTEX, ATTRIB) yield queued queued = entity tags = [tag] else: tags.append(tag) continue # if entities - nothing else matters elif code == 2 and prev_code == 0 and prev_value == 'SECTION': entities = (value == 'ENTITIES') prev_code = code prev_value = value
def test_ansi_1250(self): self.assertEqual('cp1250', toencoding('ansi_1250'))
def test_default(self): self.assertEqual('cp1252', toencoding('xyz'))
def test_ansi_1250(): assert 'cp1250' == toencoding('ansi_1250')
def test_default(): assert 'cp1252' == toencoding('xyz')
def load(filename: str) -> FileStructure: """ Load DXF file structure for file `filename`, the file has to be seekable. Args: filename: file system file name Raises: DXFStructureError: Invalid or incomplete DXF file. """ file_structure = FileStructure(filename) file = open(filename, mode='rb') line: int = 1 eof = False header = False index: List[IndexEntry] = [] prev_code: int = -1 prev_value: bytes = b'' structure = None # the actual structure tag: 'SECTION', 'LINE', ... def load_tag() -> Tuple[int, bytes]: nonlocal line try: code = int(file.readline()) except ValueError: raise DXFStructureError(f'Invalid group code in line {line}') if code < 0 or code > 1071: raise DXFStructureError( f'Invalid group code {code} in line {line}') value = file.readline().rstrip(b'\r\n') line += 2 return code, value def load_header_var() -> str: _, value = load_tag() return value.decode() while not eof: location = file.tell() tag_line = line try: code, value = load_tag() if header and code == 9: if value == b'$ACADVER': file_structure.version = load_header_var() elif value == b'$DWGCODEPAGE': file_structure.encoding = toencoding(load_header_var()) continue except IOError: break if code == 0: # All structure tags have group code == 0, store file location structure = value index.append(IndexEntry(0, value.decode(), location, tag_line)) eof = (value == b'EOF') elif code == 2 and prev_code == 0 and prev_value == b'SECTION': # Section name is the tag (2, name) following the (0, SECTION) tag. header = (value == b'HEADER') index.append(IndexEntry(2, value.decode(), location, tag_line)) elif code == 5 and structure != b'DIMSTYLE': # Entity handles have always group code 5. index.append(IndexEntry(5, value.decode(), location, tag_line)) elif code == 105 and structure == b'DIMSTYLE': # Except the DIMSTYLE table entry has group code 105. index.append(IndexEntry(5, value.decode(), location, tag_line)) prev_code = code prev_value = value file.close() if not eof: raise DXFStructureError(f'Unexpected end of file.') if file_structure.version >= 'AC1021': # R2007 and later file_structure.encoding = 'utf-8' file_structure.index = index return file_structure
def test_ansi_1250(): assert "cp1250" == toencoding("ansi_1250")
def test_default(): assert "cp1252" == toencoding("xyz")