def check_binary(name, file_path: bool = True) -> bool: # Handles files if file_path is True or text if file_path is False temp: IO[bytes] if file_path: temp = open(name, "rb") size = os.stat(name).st_size else: temp = BytesIO(name) size = len(name) read_start = int(size / 2) read_length = 1024 try: if util.is_binary(temp.read(read_length)): return True # Some binary files have text only within the first 1024 # Read 1024 from the middle of the file if this is not # a gzip or zip compressed file (bzip are indexed), # to avoid issues with long txt headers on binary files. if file_path and not is_gzip(name) and not is_zip(name) and not is_bz2( name): # file_path=False doesn't seem to be used in the codebase temp.seek(read_start) return util.is_binary(temp.read(read_length)) return False finally: temp.close()
def set_meta(self, dataset, **kwd): if dataset.has_data(): dataset.metadata.field_names = [] dataset.metadata.field_components = {} dataset_type = None field_components = {} dataset_structure_complete = False with open(dataset.file_name) as fh: for i, line in enumerate(fh): line = line.strip() if not line: continue if i < 3: dataset = self.set_initial_metadata(i, line, dataset) elif dataset.metadata.file_format == 'ASCII' or not util.is_binary(line): if dataset_structure_complete: dataset, field_components = self.set_dataset_attributes_metadata(line, dataset, field_components) elif line.startswith('POINT_DATA') or line.startswith('CELL_DATA'): dataset_structure_complete = True dataset, field_components = self.set_dataset_attributes_metadata(line, dataset, field_components) else: dataset, dataset_type = self.set_dataset_structure_metadata(line, dataset, dataset_type) if len(field_components) > 0: dataset.metadata.field_components = field_components
def check_binary(name, file_path=True): # Handles files if file_path is True or text if file_path is False if file_path: temp = open(name, "rb") else: temp = BytesIO(name) try: return util.is_binary(temp.read(1024)) finally: temp.close()
def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'): """Writes a stream to the provided file descriptor, returns the file's name and bool( is_multi_byte ). Closes file descriptor""" # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better CHUNK_SIZE = 1048576 data_checked = False is_compressed = False is_binary = False is_multi_byte = False try: codecs.lookup(target_encoding) except Exception: target_encoding = util.DEFAULT_ENCODING # utf-8 if not source_encoding: source_encoding = util.DEFAULT_ENCODING # sys.getdefaultencoding() would mimic old behavior (defaults to ascii) while True: chunk = stream.read(CHUNK_SIZE) if not chunk: break if not data_checked: # See if we're uploading a compressed file if zipfile.is_zipfile(filename): is_compressed = True else: try: if text_type(chunk[:2]) == text_type(util.gzip_magic): is_compressed = True except Exception: pass if not is_compressed: # See if we have a multi-byte character file chars = chunk[:100] is_multi_byte = multi_byte.is_multi_byte(chars) if not is_multi_byte: is_binary = util.is_binary(chunk) data_checked = True if not is_compressed and not is_binary: if not isinstance(chunk, text_type): chunk = chunk.decode(source_encoding, source_error) os.write(fd, chunk.encode(target_encoding, target_error)) else: # Compressed files must be encoded after they are uncompressed in the upload utility, # while binary files should not be encoded at all. os.write(fd, chunk) os.close(fd) return filename, is_multi_byte
def check_binary( name, file_path=True ): # Handles files if file_path is True or text if file_path is False is_binary = False if file_path: temp = open( name, "U" ) else: temp = StringIO( name ) try: for char in temp.read( 100 ): if util.is_binary( char ): is_binary = True break finally: temp.close( ) return is_binary
def check_binary(name, file_path=True): # Handles files if file_path is True or text if file_path is False is_binary = False if file_path: temp = open(name, "U") else: temp = StringIO(name) try: for char in temp.read(100): if util.is_binary(char): is_binary = True break finally: temp.close() return is_binary
def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'): """Writes a stream to the provided file descriptor, returns the file name. Closes file descriptor""" # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better CHUNK_SIZE = 1048576 data_checked = False is_compressed = False is_binary = False try: codecs.lookup(target_encoding) except Exception: target_encoding = util.DEFAULT_ENCODING # utf-8 if not source_encoding: source_encoding = util.DEFAULT_ENCODING # sys.getdefaultencoding() would mimic old behavior (defaults to ascii) while True: chunk = stream.read(CHUNK_SIZE) if not chunk: break if not data_checked: # See if we're uploading a compressed file try: # Convert chunk to a bytestring if it is not already. # Check if the first 2 bytes of the chunk are equal to the # gzip magic number. if smart_str(chunk)[:2] == util.gzip_magic: is_compressed = True except Exception: pass if not is_compressed: is_binary = util.is_binary(chunk) data_checked = True if not is_compressed and not is_binary: if not isinstance(chunk, text_type): chunk = chunk.decode(source_encoding, source_error) os.write(fd, chunk.encode(target_encoding, target_error)) else: # Compressed files must be encoded after they are uncompressed in the upload utility, # while binary files should not be encoded at all. if isinstance(chunk, text_type): chunk = chunk.encode(target_encoding, target_error) os.write(fd, chunk) os.close(fd) return filename
def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'): """Writes a stream to the provided file descriptor, returns the file's name and bool( is_multi_byte ). Closes file descriptor""" # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better CHUNK_SIZE = 1048576 data_checked = False is_compressed = False is_binary = False is_multi_byte = False try: codecs.lookup(target_encoding) except: target_encoding = util.DEFAULT_ENCODING # utf-8 if not source_encoding: source_encoding = util.DEFAULT_ENCODING # sys.getdefaultencoding() would mimic old behavior (defaults to ascii) while True: chunk = stream.read(CHUNK_SIZE) if not chunk: break if not data_checked: # See if we're uploading a compressed file if zipfile.is_zipfile(filename): is_compressed = True else: try: if text_type(chunk[:2]) == text_type(util.gzip_magic): is_compressed = True except: pass if not is_compressed: # See if we have a multi-byte character file chars = chunk[:100] is_multi_byte = multi_byte.is_multi_byte(chars) if not is_multi_byte: is_binary = util.is_binary(chunk) data_checked = True if not is_compressed and not is_binary: if not isinstance(chunk, text_type): chunk = chunk.decode(source_encoding, source_error) os.write(fd, chunk.encode(target_encoding, target_error)) else: # Compressed files must be encoded after they are uncompressed in the upload utility, # while binary files should not be encoded at all. os.write(fd, chunk) os.close(fd) return filename, is_multi_byte
def check_binary( name, file_path=True ): # Handles files if file_path is True or text if file_path is False is_binary = False if file_path: temp = open( name, "U" ) else: temp = name chars_read = 0 for chars in temp: for char in chars: chars_read += 1 if util.is_binary( char ): is_binary = True break if chars_read > 100: break if chars_read > 100: break if file_path: temp.close() return is_binary
def check_binary(name, file_path=True): # Handles files if file_path is True or text if file_path is False is_binary = False if file_path: temp = open(name, "U") else: temp = name chars_read = 0 for chars in temp: for char in chars: chars_read += 1 if util.is_binary(char): is_binary = True break if chars_read > 100: break if chars_read > 100: break if file_path: temp.close() return is_binary
def stream_to_open_named_file(stream, fd, filename, source_encoding=None, source_error='strict', target_encoding=None, target_error='strict'): """Writes a stream to the provided file descriptor, returns the file name. Closes file descriptor""" # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better CHUNK_SIZE = 1048576 data_checked = False is_compressed = False is_binary = False try: codecs.lookup(target_encoding) except Exception: target_encoding = util.DEFAULT_ENCODING # utf-8 if not source_encoding: source_encoding = util.DEFAULT_ENCODING # sys.getdefaultencoding() would mimic old behavior (defaults to ascii) while True: chunk = stream.read(CHUNK_SIZE) if not chunk: break if not data_checked: # See if we're uploading a compressed file try: # Convert chunk to a bytestring if it is not already. # Check if the first 2 bytes of the chunk are equal to the # gzip magic number. if smart_str(chunk)[:2] == util.gzip_magic: is_compressed = True except Exception: pass if not is_compressed: is_binary = util.is_binary(chunk) data_checked = True if not is_compressed and not is_binary: if not isinstance(chunk, text_type): chunk = chunk.decode(source_encoding, source_error) os.write(fd, chunk.encode(target_encoding, target_error)) else: # Compressed files must be encoded after they are uncompressed in the upload utility, # while binary files should not be encoded at all. os.write(fd, chunk) os.close(fd) return filename
def guess_ext( fname, sniff_order=None, is_multi_byte=False ): """ Returns an extension that can be used in the datatype factory to generate a data for the 'fname' file >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') >>> guess_ext(fname) 'xml' >>> fname = get_test_fname('interval.interval') >>> guess_ext(fname) 'interval' >>> fname = get_test_fname('interval1.bed') >>> guess_ext(fname) 'bed' >>> fname = get_test_fname('test_tab.bed') >>> guess_ext(fname) 'bed' >>> fname = get_test_fname('sequence.maf') >>> guess_ext(fname) 'maf' >>> fname = get_test_fname('sequence.fasta') >>> guess_ext(fname) 'fasta' >>> fname = get_test_fname('file.html') >>> guess_ext(fname) 'html' >>> fname = get_test_fname('test.gtf') >>> guess_ext(fname) 'gtf' >>> fname = get_test_fname('test.gff') >>> guess_ext(fname) 'gff' >>> fname = get_test_fname('gff_version_3.gff') >>> guess_ext(fname) 'gff3' >>> fname = get_test_fname('temp.txt') >>> file(fname, 'wt').write("a\\t2\\nc\\t1\\nd\\t0") >>> guess_ext(fname) 'tabular' >>> fname = get_test_fname('temp.txt') >>> file(fname, 'wt').write("a 1 2 x\\nb 3 4 y\\nc 5 6 z") >>> guess_ext(fname) 'txt' >>> fname = get_test_fname('test_tab1.tabular') >>> guess_ext(fname) 'tabular' >>> fname = get_test_fname('alignment.lav') >>> guess_ext(fname) 'lav' >>> fname = get_test_fname('1.sff') >>> guess_ext(fname) 'sff' >>> fname = get_test_fname('1.bam') >>> guess_ext(fname) 'bam' >>> fname = get_test_fname('3unsorted.bam') >>> guess_ext(fname) 'bam' """ if sniff_order is None: datatypes_registry = registry.Registry() datatypes_registry.load_datatypes() sniff_order = datatypes_registry.sniff_order for datatype in sniff_order: """ Some classes may not have a sniff function, which is ok. In fact, the Tabular and Text classes are 2 examples of classes that should never have a sniff function. Since these classes are default classes, they contain few rules to filter out data of other formats, so they should be called from this function after all other datatypes in sniff_order have not been successfully discovered. """ try: if datatype.sniff( fname ): return datatype.file_ext except: pass headers = get_headers( fname, None ) is_binary = False if is_multi_byte: is_binary = False else: for hdr in headers: for char in hdr: #old behavior had 'char' possibly having length > 1, #need to determine when/if this occurs is_binary = util.is_binary( char ) if is_binary: break if is_binary: break if is_binary: return 'data' #default binary data type file extension if is_column_based( fname, '\t', 1, is_multi_byte=is_multi_byte ): return 'tabular' #default tabular data type file extension return 'txt' #default text data type file extension
def guess_ext(fname, sniff_order, is_multi_byte=False): """ Returns an extension that can be used in the datatype factory to generate a data for the 'fname' file >>> from galaxy.datatypes import registry >>> sample_conf = os.path.join(util.galaxy_directory(), "config", "datatypes_conf.xml.sample") >>> datatypes_registry = registry.Registry() >>> datatypes_registry.load_datatypes(root_dir=util.galaxy_directory(), config=sample_conf) >>> sniff_order = datatypes_registry.sniff_order >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') >>> guess_ext(fname, sniff_order) 'blastxml' >>> fname = get_test_fname('interval.interval') >>> guess_ext(fname, sniff_order) 'interval' >>> fname = get_test_fname('interval1.bed') >>> guess_ext(fname, sniff_order) 'bed' >>> fname = get_test_fname('test_tab.bed') >>> guess_ext(fname, sniff_order) 'bed' >>> fname = get_test_fname('sequence.maf') >>> guess_ext(fname, sniff_order) 'maf' >>> fname = get_test_fname('sequence.fasta') >>> guess_ext(fname, sniff_order) 'fasta' >>> fname = get_test_fname('file.html') >>> guess_ext(fname, sniff_order) 'html' >>> fname = get_test_fname('test.gtf') >>> guess_ext(fname, sniff_order) 'gtf' >>> fname = get_test_fname('test.gff') >>> guess_ext(fname, sniff_order) 'gff' >>> fname = get_test_fname('gff_version_3.gff') >>> guess_ext(fname, sniff_order) 'gff3' >>> fname = get_test_fname('temp.txt') >>> open(fname, 'wt').write("a\\t2") >>> guess_ext(fname, sniff_order) 'txt' >>> fname = get_test_fname('temp.txt') >>> open(fname, 'wt').write("a\\t2\\nc\\t1\\nd\\t0") >>> guess_ext(fname, sniff_order) 'tabular' >>> fname = get_test_fname('temp.txt') >>> open(fname, 'wt').write("a 1 2 x\\nb 3 4 y\\nc 5 6 z") >>> guess_ext(fname, sniff_order) 'txt' >>> fname = get_test_fname('test_tab1.tabular') >>> guess_ext(fname, sniff_order) 'tabular' >>> fname = get_test_fname('alignment.lav') >>> guess_ext(fname, sniff_order) 'lav' >>> fname = get_test_fname('1.sff') >>> guess_ext(fname, sniff_order) 'sff' >>> fname = get_test_fname('1.bam') >>> guess_ext(fname, sniff_order) 'bam' >>> fname = get_test_fname('3unsorted.bam') >>> guess_ext(fname, sniff_order) 'bam' >>> fname = get_test_fname('test.idpDB') >>> guess_ext(fname, sniff_order) 'idpdb' >>> fname = get_test_fname('test.mz5') >>> guess_ext(fname, sniff_order) 'h5' >>> fname = get_test_fname('issue1818.tabular') >>> guess_ext(fname, sniff_order) 'tabular' >>> fname = get_test_fname('drugbank_drugs.cml') >>> guess_ext(fname, sniff_order) 'cml' >>> fname = get_test_fname('q.fps') >>> guess_ext(fname, sniff_order) 'fps' >>> fname = get_test_fname('drugbank_drugs.inchi') >>> guess_ext(fname, sniff_order) 'inchi' >>> fname = get_test_fname('drugbank_drugs.mol2') >>> guess_ext(fname, sniff_order) 'mol2' >>> fname = get_test_fname('drugbank_drugs.sdf') >>> guess_ext(fname, sniff_order) 'sdf' >>> fname = get_test_fname('5e5z.pdb') >>> guess_ext(fname, sniff_order) 'pdb' >>> fname = get_test_fname('mothur_datatypetest_true.mothur.otu') >>> guess_ext(fname, sniff_order) 'mothur.otu' >>> fname = get_test_fname('1.gg') >>> guess_ext(fname, sniff_order) 'gg' >>> fname = get_test_fname('diamond_db.dmnd') >>> guess_ext(fname, sniff_order) 'dmnd' >>> fname = get_test_fname('1.xls') >>> guess_ext(fname, sniff_order) 'excel.xls' >>> fname = get_test_fname('biom2_sparse_otu_table_hdf5.biom') >>> guess_ext(fname, sniff_order) 'biom2' """ file_ext = None for datatype in sniff_order: """ Some classes may not have a sniff function, which is ok. In fact, the Tabular and Text classes are 2 examples of classes that should never have a sniff function. Since these classes are default classes, they contain few rules to filter out data of other formats, so they should be called from this function after all other datatypes in sniff_order have not been successfully discovered. """ try: if datatype.sniff(fname): file_ext = datatype.file_ext break except: pass # Ugly hack for tsv vs tabular sniffing, we want to prefer tabular # to tsv but it doesn't have a sniffer - is TSV was sniffed just check # if it is an okay tabular and use that instead. if file_ext == 'tsv': if is_column_based(fname, '\t', 1, is_multi_byte=is_multi_byte): file_ext = 'tabular' if file_ext is not None: return file_ext headers = get_headers(fname, None) is_binary = False if is_multi_byte: is_binary = False else: for hdr in headers: for char in hdr: # old behavior had 'char' possibly having length > 1, # need to determine when/if this occurs is_binary = util.is_binary(char) if is_binary: break if is_binary: break if is_binary: return 'data' # default binary data type file extension if is_column_based(fname, '\t', 1, is_multi_byte=is_multi_byte): return 'tabular' # default tabular data type file extension return 'txt' # default text data type file extension
def guess_ext(fname, sniff_order, is_multi_byte=False): """ Returns an extension that can be used in the datatype factory to generate a data for the 'fname' file >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') >>> from galaxy.datatypes import registry >>> sample_conf = os.path.join(util.galaxy_directory(), "config", "datatypes_conf.xml.sample") >>> datatypes_registry = registry.Registry() >>> datatypes_registry.load_datatypes(root_dir=util.galaxy_directory(), config=sample_conf) >>> sniff_order = datatypes_registry.sniff_order >>> guess_ext(fname, sniff_order) 'xml' >>> fname = get_test_fname('interval.interval') >>> guess_ext(fname, sniff_order) 'interval' >>> fname = get_test_fname('interval1.bed') >>> guess_ext(fname, sniff_order) 'bed' >>> fname = get_test_fname('test_tab.bed') >>> guess_ext(fname, sniff_order) 'bed' >>> fname = get_test_fname('sequence.maf') >>> guess_ext(fname, sniff_order) 'maf' >>> fname = get_test_fname('sequence.fasta') >>> guess_ext(fname, sniff_order) 'fasta' >>> fname = get_test_fname('file.html') >>> guess_ext(fname, sniff_order) 'html' >>> fname = get_test_fname('test.gtf') >>> guess_ext(fname, sniff_order) 'gtf' >>> fname = get_test_fname('test.gff') >>> guess_ext(fname, sniff_order) 'gff' >>> fname = get_test_fname('gff_version_3.gff') >>> guess_ext(fname, sniff_order) 'gff3' >>> fname = get_test_fname('temp.txt') >>> open(fname, 'wt').write("a\\t2") >>> guess_ext(fname, sniff_order) 'txt' >>> fname = get_test_fname('temp.txt') >>> open(fname, 'wt').write("a\\t2\\nc\\t1\\nd\\t0") >>> guess_ext(fname, sniff_order) 'tabular' >>> fname = get_test_fname('temp.txt') >>> open(fname, 'wt').write("a 1 2 x\\nb 3 4 y\\nc 5 6 z") >>> guess_ext(fname, sniff_order) 'txt' >>> fname = get_test_fname('test_tab1.tabular') >>> guess_ext(fname, sniff_order) 'tabular' >>> fname = get_test_fname('alignment.lav') >>> guess_ext(fname, sniff_order) 'lav' >>> fname = get_test_fname('1.sff') >>> guess_ext(fname, sniff_order) 'sff' >>> fname = get_test_fname('1.bam') >>> guess_ext(fname, sniff_order) 'bam' >>> fname = get_test_fname('3unsorted.bam') >>> guess_ext(fname, sniff_order) 'bam' >>> fname = get_test_fname('test.idpDB') >>> guess_ext(fname, sniff_order) 'idpdb' >>> fname = get_test_fname('test.mz5') >>> guess_ext(fname, sniff_order) 'h5' >>> fname = get_test_fname('issue1818.tabular') >>> guess_ext(fname, sniff_order) 'tabular' >>> fname = get_test_fname('drugbank_drugs.cml') >>> guess_ext(fname, sniff_order) 'cml' >>> fname = get_test_fname('q.fps') >>> guess_ext(fname, sniff_order) 'fps' >>> fname = get_test_fname('drugbank_drugs.inchi') >>> guess_ext(fname, sniff_order) 'inchi' >>> fname = get_test_fname('drugbank_drugs.mol2') >>> guess_ext(fname, sniff_order) 'mol2' >>> fname = get_test_fname('drugbank_drugs.sdf') >>> guess_ext(fname, sniff_order) 'sdf' >>> fname = get_test_fname('5e5z.pdb') >>> guess_ext(fname, sniff_order) 'pdb' >>> fname = get_test_fname('mothur_datatypetest_true.mothur.otu') >>> guess_ext(fname, sniff_order) 'mothur.otu' """ file_ext = None for datatype in sniff_order: """ Some classes may not have a sniff function, which is ok. In fact, the Tabular and Text classes are 2 examples of classes that should never have a sniff function. Since these classes are default classes, they contain few rules to filter out data of other formats, so they should be called from this function after all other datatypes in sniff_order have not been successfully discovered. """ try: if datatype.sniff(fname): file_ext = datatype.file_ext break except: pass # Ugly hack for tsv vs tabular sniffing, we want to prefer tabular # to tsv but it doesn't have a sniffer - is TSV was sniffed just check # if it is an okay tabular and use that instead. if file_ext == 'tsv': if is_column_based(fname, '\t', 1, is_multi_byte=is_multi_byte): file_ext = 'tabular' if file_ext is not None: return file_ext headers = get_headers(fname, None) is_binary = False if is_multi_byte: is_binary = False else: for hdr in headers: for char in hdr: # old behavior had 'char' possibly having length > 1, # need to determine when/if this occurs is_binary = util.is_binary(char) if is_binary: break if is_binary: break if is_binary: return 'data' # default binary data type file extension if is_column_based(fname, '\t', 1, is_multi_byte=is_multi_byte): return 'tabular' # default tabular data type file extension return 'txt' # default text data type file extension
def set_meta(self, dataset, **kwd): if dataset.has_data(): dataset.metadata.field_names = [] dataset.metadata.field_components = {} dataset_type = None field_components = {} dataset_structure_complete = False processing_field_section = False with open(dataset.file_name) as fh: for i, line in enumerate(fh): line = line.strip() if not line: continue if i < 3: dataset = self.set_initial_metadata(i, line, dataset) elif dataset.metadata.file_format == 'ASCII' or not util.is_binary(line): if dataset_structure_complete: """ The final part of legacy VTK files describes the dataset attributes. This part begins with the keywords POINT_DATA or CELL_DATA, followed by an integer number specifying the number of points or cells, respectively. Other keyword/data combinations then define the actual dataset attribute values (i.e., scalars, vectors, tensors, normals, texture coordinates, or field data). Dataset attributes are supported for both points and cells. Each type of attribute data has a dataName associated with it. This is a character string (without embedded whitespace) used to identify a particular data. The dataName is used by the VTK readers to extract data. As a result, more than one attribute data of the same type can be included in a file. For example, two different scalar fields defined on the dataset points, pressure and temperature, can be contained in the same file. If the appropriate dataName is not specified in the VTK reader, then the first data of that type is extracted from the file. """ items = line.split() if items[0] == 'SCALARS': # Example: SCALARS surface_field double 3 # Scalar definition includes specification of a lookup table. The # definition of a lookup table is optional. If not specified, the # default VTK table will be used, and tableName should be # "default". Also note that the numComp variable is optional. By # default the number of components is equal to one. The parameter # numComp must range between (1,4) inclusive; in versions of VTK # prior to vtk2.3 this parameter was not supported. field_name = items[1] dataset.metadata.field_names.append(field_name) try: num_components = int(items[-1]) except Exception: num_components = 1 field_component_indexes = [str(i) for i in range(num_components)] field_components[field_name] = field_component_indexes elif items[0] == 'FIELD': # The dataset consists of CELL_DATA. # FIELD FieldData 2 processing_field_section = True num_fields = int(items[-1]) fields_processed = [] elif processing_field_section: if len(fields_processed) == num_fields: processing_field_section = False else: try: float(items[0]) # Don't process the cell data. # 0.0123457 0.197531 except Exception: # Line consists of arrayName numComponents numTuples dataType. # Example: surface_field1 1 12 double field_name = items[0] dataset.metadata.field_names.append(field_name) num_components = int(items[1]) field_component_indexes = [str(i) for i in range(num_components)] field_components[field_name] = field_component_indexes fields_processed.append(field_name) elif line.startswith('CELL_DATA'): # CELL_DATA 3188 dataset_structure_complete = True dataset.metadata.cells = int(line.split()[1]) elif line.startswith('POINT_DATA'): # POINT_DATA 1876 dataset_structure_complete = True dataset.metadata.points = int(line.split()[1]) else: dataset, dataset_type = self.set_structure_metadata(line, dataset, dataset_type) if len(field_components) > 0: dataset.metadata.field_components = field_components
def set_meta(self, dataset, **kwd): if dataset.has_data(): dataset.metadata.field_names = [] dataset.metadata.field_components = {} dataset_type = None field_components = {} dataset_structure_complete = False processing_field_section = False with open(dataset.file_name) as fh: for i, line in enumerate(fh): line = line.strip() if not line: continue if i < 3: dataset = self.set_initial_metadata(i, line, dataset) elif dataset.metadata.file_format == 'ASCII' or not util.is_binary( line): if dataset_structure_complete: """ The final part of legacy VTK files describes the dataset attributes. This part begins with the keywords POINT_DATA or CELL_DATA, followed by an integer number specifying the number of points or cells, respectively. Other keyword/data combinations then define the actual dataset attribute values (i.e., scalars, vectors, tensors, normals, texture coordinates, or field data). Dataset attributes are supported for both points and cells. Each type of attribute data has a dataName associated with it. This is a character string (without embedded whitespace) used to identify a particular data. The dataName is used by the VTK readers to extract data. As a result, more than one attribute data of the same type can be included in a file. For example, two different scalar fields defined on the dataset points, pressure and temperature, can be contained in the same file. If the appropriate dataName is not specified in the VTK reader, then the first data of that type is extracted from the file. """ items = line.split() if items[0] == 'SCALARS': # Example: SCALARS surface_field double 3 # Scalar definition includes specification of a lookup table. The # definition of a lookup table is optional. If not specified, the # default VTK table will be used, and tableName should be # "default". Also note that the numComp variable is optional. By # default the number of components is equal to one. The parameter # numComp must range between (1,4) inclusive; in versions of VTK # prior to vtk2.3 this parameter was not supported. field_name = items[1] dataset.metadata.field_names.append(field_name) try: num_components = int(items[-1]) except: num_components = 1 field_component_indexes = [ str(i) for i in range(num_components) ] field_components[ field_name] = field_component_indexes elif items[0] == 'FIELD': # The dataset consists of CELL_DATA. # FIELD FieldData 2 processing_field_section = True num_fields = int(items[-1]) fields_processed = [] elif processing_field_section: if len(fields_processed) == num_fields: processing_field_section = False else: try: float(items[0]) # Don't process the cell data. # 0.0123457 0.197531 except: # Line consists of arrayName numComponents numTuples dataType. # Example: surface_field1 1 12 double field_name = items[0] dataset.metadata.field_names.append( field_name) num_components = int(items[1]) field_component_indexes = [ str(i) for i in range(num_components) ] field_components[ field_name] = field_component_indexes fields_processed.append(field_name) elif line.startswith('CELL_DATA'): # CELL_DATA 3188 dataset_structure_complete = True dataset.metadata.cells = int(line.split()[1]) elif line.startswith('POINT_DATA'): # POINT_DATA 1876 dataset_structure_complete = True dataset.metadata.points = int(line.split()[1]) else: dataset, dataset_type = self.set_structure_metadata( line, dataset, dataset_type) if len(field_components) > 0: dataset.metadata.field_components = field_components