def save_nparray_to_hdfs(fname, X, hdfs): ''' An instance of numpy's savetext function to enable saving numpy arrays in HDFS as text files ''' fmt = '%.18e' delimiter = ' ' newline = '\n' if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) X = np.asarray(X) if X.ndim == 1: if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] n_fmt_chars = fmt.count('%') fmt = [fmt, ] * ncol format = delimiter.join(fmt) first = True for row in X: if first: hdfs.create_file(fname, asbytes(format % tuple(row) + newline), overwrite=True) first = False else: hdfs.appendfile(fname, asbytes(format % tuple(row) + newline))
def _read_metadata(bitmap): metadata = {} models = [(name[5:], number) for name, number in METADATA_MODELS.__dict__.items() if name.startswith('FIMD_')] tag = ctypes.c_void_p() for model_name, number in models: mdhandle = _FI.FreeImage_FindFirstMetadata(number, bitmap, ctypes.byref(tag)) mdhandle = ctypes.c_void_p(mdhandle) if mdhandle: more = True while more: tag_name = asstr(_FI.FreeImage_GetTagKey(tag)) tag_type = _FI.FreeImage_GetTagType(tag) byte_size = _FI.FreeImage_GetTagLength(tag) char_ptr = ctypes.c_char * byte_size tag_str = char_ptr.from_address(_FI.FreeImage_GetTagValue(tag)) if tag_type == METADATA_DATATYPE.FIDT_ASCII: tag_val = asstr(tag_str.value) else: tag_val = numpy.fromstring(tag_str, dtype=METADATA_DATATYPE.dtypes[tag_type]) if len(tag_val) == 1: tag_val = tag_val[0] metadata[(model_name, tag_name)] = tag_val more = _FI.FreeImage_FindNextMetadata(mdhandle, ctypes.byref(tag)) _FI.FreeImage_FindCloseMetadata(mdhandle) return metadata
def info(self, source): source, close_it = self._open(source) try: # read and validate header line line = source.readline() mmid, matrix, format, field, symmetry = \ [asstr(part.strip().lower()) for part in line.split()] if not mmid.startswith('%%matrixmarket'): raise ValueError,'source is not in Matrix Market format' assert matrix == 'matrix',`line` # ??? Is this necessary? I don't see 'dense' or 'sparse' in the spec # http://math.nist.gov/MatrixMarket/formats.html if format == 'dense': format = self.FORMAT_ARRAY elif format == 'sparse': format = self.FORMAT_COORDINATE # skip comments while line.startswith(asbytes('%')): line = source.readline() line = line.split() if format == self.FORMAT_ARRAY: assert len(line)==2,`line` rows,cols = map(float, line) entries = rows*cols else: assert len(line)==3,`line` rows, cols, entries = map(float, line) return (rows, cols, entries, format, field, symmetry) finally: if close_it: source.close()
def get_variables(self, variable_names=None): ''' get variables from stream as dictionary variable_names - optional list of variable names to get If variable_names is None, then get all variables in file ''' if isinstance(variable_names, basestring): variable_names = [variable_names] self.mat_stream.seek(0) # set up variable reader self.initialize_read() mdict = {} while not self.end_of_stream(): hdr, next_position = self.read_var_header() name = asstr(hdr.name) if variable_names and name not in variable_names: self.mat_stream.seek(next_position) continue mdict[name] = self.read_var_array(hdr) self.mat_stream.seek(next_position) if variable_names: variable_names.remove(name) if len(variable_names) == 0: break return mdict
def get_variables(self, variable_names=None): ''' get variables from stream as dictionary Parameters ---------- variable_names : None or str or sequence of str, optional variable name, or sequence of variable names to get from Mat file / file stream. If None, then get all variables in file ''' if isinstance(variable_names, string_types): variable_names = [variable_names] elif variable_names is not None: variable_names = list(variable_names) self.mat_stream.seek(0) # set up variable reader self.initialize_read() mdict = {} while not self.end_of_stream(): hdr, next_position = self.read_var_header() name = asstr(hdr.name) if variable_names is not None and name not in variable_names: self.mat_stream.seek(next_position) continue mdict[name] = self.read_var_array(hdr) self.mat_stream.seek(next_position) if variable_names is not None: variable_names.remove(name) if len(variable_names) == 0: break return mdict
def _read_var(self): name = asstr(self._unpack_string()) dimensions = [] shape = [] dims = self._unpack_int() for i in range(dims): dimid = self._unpack_int() dimname = self._dims[dimid] dimensions.append(dimname) dim = self.dimensions[dimname] shape.append(dim) dimensions = tuple(dimensions) shape = tuple(shape) attributes = self._read_att_array() nc_type = self.fp.read(4) vsize = self._unpack_int() begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() typecode, size = TYPEMAP[nc_type] if typecode is 'c': dtype_ = '>c' else: dtype_ = '>%s' % typecode if size > 1: dtype_ += str(size) return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
def _filter_header(s): """Clean up 'L' in npz header ints. Cleans up the 'L' in strings representing integers. Needed to allow npz headers produced in Python2 to be read in Python3. Parameters ---------- s : byte string Npy file header. Returns ------- header : str Cleaned up header. """ import tokenize if sys.version_info[0] >= 3: from io import StringIO else: from StringIO import StringIO tokens = [] last_token_was_number = False for token in tokenize.generate_tokens(StringIO(asstr(s)).read): token_type = token[0] token_string = token[1] if last_token_was_number and token_type == tokenize.NAME and token_string == "L": continue else: tokens.append(token) last_token_was_number = token_type == tokenize.NUMBER return tokenize.untokenize(tokens)
def build_module(source_files, options=[], skip=[], only=[], module_name=None): """ Compile and import a f2py module, built from the given files. """ code = ("import sys; sys.path = %s; import numpy.f2py as f2py2e; " "f2py2e.main()" % repr(sys.path)) d = get_module_dir() # Copy files dst_sources = [] for fn in source_files: if not os.path.isfile(fn): raise RuntimeError("%s is not a file" % fn) dst = os.path.join(d, os.path.basename(fn)) shutil.copyfile(fn, dst) dst_sources.append(dst) fn = os.path.join(os.path.dirname(fn), '.f2py_f2cmap') if os.path.isfile(fn): dst = os.path.join(d, os.path.basename(fn)) if not os.path.isfile(dst): shutil.copyfile(fn, dst) # Prepare options if module_name is None: module_name = get_temp_module_name() f2py_opts = ['-c', '-m', module_name] + options + dst_sources if skip: f2py_opts += ['skip:'] + skip if only: f2py_opts += ['only:'] + only # Build cwd = os.getcwd() try: os.chdir(d) cmd = [sys.executable, '-c', code] + f2py_opts p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = p.communicate() if p.returncode != 0: raise RuntimeError("Running f2py failed: %s\n%s" % (cmd[4:], asstr(out))) finally: os.chdir(cwd) # Partial cleanup for fn in dst_sources: os.unlink(fn) # Import __import__(module_name) return sys.modules[module_name]
def _read_string(f): '''Read a string''' length = _read_long(f) if length > 0: chars = _read_bytes(f, length) _align_32(f) chars = asstr(chars) else: chars = '' return chars
def _read_att_array(self): header = self.fp.read(4) assert header in [ZERO, NC_ATTRIBUTE] count = self._unpack_int() attributes = {} for attr in range(count): name = asstr(self._unpack_string()) attributes[name] = self._read_values() return attributes
def _read_dim_array(self): header = self.fp.read(4) assert header in [ZERO, NC_DIMENSION] count = self._unpack_int() for dim in range(count): name = asstr(self._unpack_string()) length = self._unpack_int() or None # None for record dimension self.dimensions[name] = length self._dims.append(name) # preserve order
def _read_string(f): """Read a string""" length = _read_long(f) if length > 0: chars = _read_bytes(f, length) _align_32(f) chars = asstr(chars) else: chars = None return chars
def _read_string(f): '''Read a string''' length = _read_long(f) if length > 0: chars = _read_bytes(f, length) _align_32(f) chars = asstr(chars) else: warnings.warn("warning: empty strings are now set to '' instead of None") chars = '' return chars
def get_variables(self, variable_names=None): ''' get variables from stream as dictionary variable_names - optional list of variable names to get If variable_names is None, then get all variables in file ''' if isinstance(variable_names, string_types): variable_names = [variable_names] elif variable_names is not None: variable_names = list(variable_names) self.mat_stream.seek(0) # Here we pass all the parameters in self to the reading objects self.initialize_read() mdict = self.read_file_header() mdict['__globals__'] = [] while not self.end_of_stream(): hdr, next_position = self.read_var_header() name = asstr(hdr.name) if name in mdict: warnings.warn('Duplicate variable name "%s" in stream' ' - replacing previous with new\n' 'Consider mio5.varmats_from_mat to split ' 'file into single variable files' % name, MatReadWarning, stacklevel=2) if name == '': # can only be a matlab 7 function workspace name = '__function_workspace__' # We want to keep this raw because mat_dtype processing # will break the format (uint8 as mxDOUBLE_CLASS) process = False else: process = True if variable_names and name not in variable_names: self.mat_stream.seek(next_position) continue try: res = self.read_var_array(hdr, process) except MatReadError as err: warnings.warn( 'Unreadable variable "%s", because "%s"' % (name, err), Warning, stacklevel=2) res = "Read error: %s" % err self.mat_stream.seek(next_position) mdict[name] = res if hdr.is_global: mdict['__globals__'].append(name) if variable_names: variable_names.remove(name) if len(variable_names) == 0: break return mdict
def _read_dim_array(self): header = self.fp.read(4) if header not in [ZERO, NC_DIMENSION]: raise ValueError("Unexpected header.") count = self._unpack_int() for dim in range(count): name = asstr(self._unpack_string()) length = self._unpack_int() or None # None for record dimension self.dimensions[name] = length self._dims.append(name) # preserve order
def _read_att_array(self): header = self.fp.read(4) if header not in [ZERO, NC_ATTRIBUTE]: raise ValueError("Unexpected header.") count = self._unpack_int() attributes = {} for attr in range(count): name = asstr(self._unpack_string()) attributes[name] = self._read_values() return attributes
def list_variables(self): ''' list variables from stream ''' self.mat_stream.seek(0) # set up variable reader self.initialize_read() vars = [] while not self.end_of_stream(): hdr, next_position = self.read_var_header() name = asstr(hdr.name) shape = self._matrix_reader.shape_from_header(hdr) info = mclass_info.get(hdr.mclass, 'unknown') vars.append((name, shape, info)) self.mat_stream.seek(next_position) return vars
def read_minimat_vars(rdr): rdr.initialize_read() mdict = {'__globals__': []} i = 0 while not rdr.end_of_stream(): hdr, next_position = rdr.read_var_header() name = asstr(hdr.name) if name == '': name = 'var_%d' % i i += 1 res = rdr.read_var_array(hdr, process=False) rdr.mat_stream.seek(next_position) mdict[name] = res if hdr.is_global: mdict['__globals__'].append(name) return mdict
def list_variables(self): ''' list variables from stream ''' self.mat_stream.seek(0) # Here we pass all the parameters in self to the reading objects self.initialize_read() self.read_file_header() vars = [] while not self.end_of_stream(): hdr, next_position = self.read_var_header() name = asstr(hdr.name) if name == '': # can only be a matlab 7 function workspace name = '__function_workspace__' shape = self._matrix_reader.shape_from_header(hdr) info = mclass_info.get(hdr.mclass, 'unknown') vars.append((name, shape, info)) self.mat_stream.seek(next_position) return vars
def info(self, source): source, close_it = self._open(source) try: # read and validate header line line = source.readline() mmid, matrix, format, field, symmetry = \ [asstr(part.strip()) for part in line.split()] if not mmid.startswith('%%MatrixMarket'): raise ValueError('source is not in Matrix Market format') if not matrix.lower() == 'matrix': raise ValueError("Problem reading file header: " + line) # http://math.nist.gov/MatrixMarket/formats.html if format.lower() == 'array': format = self.FORMAT_ARRAY elif format.lower() == 'coordinate': format = self.FORMAT_COORDINATE # skip comments while line.startswith(b'%'): line = source.readline() line = line.split() if format == self.FORMAT_ARRAY: if not len(line) == 2: raise ValueError("Header line not of length 2: " + line) rows, cols = map(int, line) entries = rows * cols else: if not len(line) == 3: raise ValueError("Header line not of length 3: " + line) rows, cols, entries = map(int, line) return (rows, cols, entries, format, field.lower(), symmetry.lower()) finally: if close_it: source.close()
def svn_version(): from numpy.compat import asstr env = os.environ.copy() env['LC_ALL'] = 'C' try: out = subprocess.Popen(['svn', 'info'], stdout=subprocess.PIPE, env=env).communicate()[0] except OSError: warnings.warn(" --- Could not run svn info --- ") return "" r = re.compile('Revision: ([0-9]+)') svnver = None for line in asstr(out).split('\n'): m = r.match(line) if m: svnver = m.group(1) if not svnver: raise ValueError("Error while parsing svn version ?") return svnver
def _filter_header(s): """Clean up 'L' in npz header ints. Cleans up the 'L' in strings representing integers. Needed to allow npz headers produced in Python2 to be read in Python3. Parameters ---------- s : byte string Npy file header. Returns ------- header : str Cleaned up header. """ import tokenize if sys.version_info[0] >= 3: from io import StringIO else: from StringIO import StringIO tokens = [] last_token_was_number = False # adding newline as python 2.7.5 workaround string = asstr(s) + "\n" for token in tokenize.generate_tokens(StringIO(string).readline): token_type = token[0] token_string = token[1] if (last_token_was_number and token_type == tokenize.NAME and token_string == "L"): continue else: tokens.append(token) last_token_was_number = (token_type == tokenize.NUMBER) # removing newline (see above) as python 2.7.5 workaround return tokenize.untokenize(tokens)[:-1]
def _read_var(self): name = asstr(self._unpack_string()) dimensions = [] shape = [] dims = self._unpack_int() for i in range(dims): dimid = self._unpack_int() dimname = self._dims[dimid] dimensions.append(dimname) dim = self.dimensions[dimname] shape.append(dim) dimensions = tuple(dimensions) shape = tuple(shape) attributes = self._read_att_array() nc_type = self.fp.read(4) vsize = int(fromstring(self.fp.read(4), 'int32')[0]) start = [self._unpack_int, self._unpack_int64][self.version_byte-1]() type = TYPEMAP(nc_type) return name, dimensions, shape, attributes, type, start, vsize
def _filter_header(s): """Clean up 'L' in npz header ints. Cleans up the 'L' in strings representing integers. Needed to allow npz headers produced in Python2 to be read in Python3. Parameters ---------- s : byte string Npy file header. Returns ------- header : str Cleaned up header. """ import tokenize if sys.version_info[0] >= 3: from io import StringIO else: from StringIO import StringIO tokens = [] last_token_was_number = False for token in tokenize.generate_tokens(StringIO(asstr(s)).read): token_type = token[0] token_string = token[1] if (last_token_was_number and token_type == tokenize.NAME and token_string == "L"): continue else: tokens.append(token) last_token_was_number = (token_type == tokenize.NUMBER) return tokenize.untokenize(tokens)
def _read_var(self): name = asstr(self._unpack_string()) dimensions = [] shape = [] dims = self._unpack_int() for i in range(dims): dimid = self._unpack_int() dimname = self._dims[dimid] dimensions.append(dimname) dim = self.dimensions[dimname] shape.append(dim) dimensions = tuple(dimensions) shape = tuple(shape) attributes = self._read_att_array() nc_type = self.fp.read(4) vsize = self._unpack_int() begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() typecode, size = TYPEMAP[nc_type] dtype_ = '>%s' % typecode return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
def savetxt_nice(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# '): """ Reimplmenentation of numpy's savetxt that doesn't complain about bytes when saving to unicode files in Python 3. Save an array to a text file. Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : array_like Data to be saved to a text file. fmt : str or sequence of strs, optional A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. For complex `X`, the legal options for `fmt` are: a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted like `' (%s+%sj)' % (fmt, fmt)` b) a full string specifying every real and imaginary part, e.g. `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns c) a list of specifiers, one per column - in this case, the real and imaginary part must have separate specifiers, e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns delimiter : str, optional String or character separating columns. newline : str, optional String or character separating lines. .. versionadded:: 1.5.0 header : str, optional String that will be written at the beginning of the file. .. versionadded:: 1.7.0 footer : str, optional String that will be written at the end of the file. .. versionadded:: 1.7.0 comments : str, optional String that will be prepended to the ``header`` and ``footer`` strings, to mark them as comments. Default: '# ', as expected by e.g. ``numpy.loadtxt``. .. versionadded:: 1.7.0 See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into an uncompressed ``.npz`` archive savez_compressed : Save several arrays into a compressed ``.npz`` archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to precede result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> x = y = z = np.arange(0.0,5.0,1.0) >>> np.savetxt('test.out', x, delimiter=',') # X is an array >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation """ # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) own_fh = False if _is_string_like(fname): own_fh = True if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: if sys.version_info[0] >= 3: fh = open(fname, 'wb') else: fh = open(fname, 'w') elif hasattr(fname, 'write'): fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') print("type(fmt) = %s" % type(fmt)) if isinstance(fmt, (list, tuple)): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [ ' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [ fmt, ] * ncol format = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif ((not iscomplex_X) and n_fmt_chars != ncol): raise error else: format = fmt else: raise ValueError('invalid fmt: %r' % (fmt, )) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(asbytes(comments + header + newline)) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) fh.write(asbytes(format % tuple(row2) + newline)) else: for row in X: try: print('format = %r' % format, type(format)) fh.write(asbytes(format % tuple(row) + newline)) except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), format)) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(asbytes(comments + footer + newline)) finally: if own_fh: fh.close()
def info(self, source): """ Return size, storage parameters from Matrix Market file-like 'source'. Parameters ---------- source : str or file-like Matrix Market filename (extension .mtx) or open file-like object Returns ------- rows : int Number of matrix rows. cols : int Number of matrix columns. entries : int Number of non-zero entries of a sparse matrix or rows*cols for a dense matrix. format : str Either 'coordinate' or 'array'. field : str Either 'real', 'complex', 'pattern', or 'integer'. symmetry : str Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. """ stream, close_it = self._open(source) try: # read and validate header line line = stream.readline() mmid, matrix, format, field, symmetry = \ [asstr(part.strip()) for part in line.split()] if not mmid.startswith('%%MatrixMarket'): raise ValueError('source is not in Matrix Market format') if not matrix.lower() == 'matrix': raise ValueError("Problem reading file header: " + line) # http://math.nist.gov/MatrixMarket/formats.html if format.lower() == 'array': format = self.FORMAT_ARRAY elif format.lower() == 'coordinate': format = self.FORMAT_COORDINATE # skip comments while line.startswith(b'%'): line = stream.readline() # skip empty lines while not line.strip(): line = stream.readline() line = line.split() if format == self.FORMAT_ARRAY: if not len(line) == 2: raise ValueError("Header line not of length 2: " + line) rows, cols = map(int, line) entries = rows * cols else: if not len(line) == 3: raise ValueError("Header line not of length 3: " + line) rows, cols, entries = map(int, line) return (rows, cols, entries, format, field.lower(), symmetry.lower()) finally: if close_it: stream.close()
def varmats_from_mat(file_obj): """ Pull variables out of mat 5 file as a sequence of mat file objects This can be useful with a difficult mat file, containing unreadable variables. This routine pulls the variables out in raw form and puts them, unread, back into a file stream for saving or reading. Another use is the pathological case where there is more than one variable of the same name in the file; this routine returns the duplicates, whereas the standard reader will overwrite duplicates in the returned dictionary. The file pointer in `file_obj` will be undefined. File pointers for the returned file-like objects are set at 0. Parameters ---------- file_obj : file-like file object containing mat file Returns ------- named_mats : list list contains tuples of (name, BytesIO) where BytesIO is a file-like object containing mat file contents as for a single variable. The BytesIO contains a string with the original header and a single var. If ``var_file_obj`` is an individual BytesIO instance, then save as a mat file with something like ``open('test.mat', 'wb').write(var_file_obj.read())`` Examples -------- >>> import scipy.io BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for python < 3. >>> mat_fileobj = BytesIO() >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'}) >>> varmats = varmats_from_mat(mat_fileobj) >>> sorted([name for name, str_obj in varmats]) ['a', 'b'] """ rdr = MatFile5Reader(file_obj) file_obj.seek(0) # Raw read of top-level file header hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize raw_hdr = file_obj.read(hdr_len) # Initialize variable reading file_obj.seek(0) rdr.initialize_read() mdict = rdr.read_file_header() next_position = file_obj.tell() named_mats = [] while not rdr.end_of_stream(): start_position = next_position hdr, next_position = rdr.read_var_header() name = asstr(hdr.name) # Read raw variable string file_obj.seek(start_position) byte_count = next_position - start_position var_str = file_obj.read(byte_count) # write to stringio object out_obj = BytesIO() out_obj.write(raw_hdr) out_obj.write(var_str) out_obj.seek(0) named_mats.append((name, out_obj)) return named_mats
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None, progress_callback=None): """numpy.savetxt modified to output progress """ # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) class WriteWrap: """Convert to bytes on bytestream inputs. """ def __init__(self, ifh, iencoding): self.fh = ifh self.encoding = iencoding self.do_write = self.first_write self.write = None def close(self): self.fh.close() def write(self, v): self.do_write(v) def write_bytes(self, v): if isinstance(v, bytes): self.fh.write(v) else: self.fh.write(v.encode(self.encoding)) def write_normal(self, v): self.fh.write(asunicode(v)) def first_write(self, v): try: self.write_normal(v) self.write = self.write_normal except TypeError: # input is probably a bytestream self.write_bytes(v) self.write = self.write_bytes own_fh = False if isinstance(fname, os_PathLike): fname = os_fspath(fname) if isinstance(fname, str): # datasource doesn't vis_support creating a new file ... open(fname, 'wt').close() fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) own_fh = True elif hasattr(fname, 'write'): # wrap to handle byte output streams fh = WriteWrap(fname, encoding or 'latin1') else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 0 or X.ndim > 2: raise ValueError("Expected 1D or 2D array, got %dD array instead" % X.ndim) elif X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.names) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) iformat = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [ ' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [ fmt, ] * ncol iformat = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif (not iscomplex_X) and n_fmt_chars != ncol: raise error else: iformat = fmt else: raise ValueError('invalid fmt: %r' % (fmt, )) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(comments + header + newline) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) s = iformat % tuple(row2) + newline fh.write(s.replace('+-', '-')) else: length = len(X) for ind, row in enumerate(X): try: v = iformat % tuple(row) + newline except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), iformat)) fh.write(v) progress_callback.emit((ind + 1) * 100 / length) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(comments + footer + newline) finally: if own_fh: fh.close()
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# '): """This code is from NumPy 1.9.1. For help see there. It was included because features are used that were added in version 1.7 but on some machines only NumPy version 1.6.2 is available. """ ## needed for the rest from numpy.compat import asstr, asbytes def _is_string_like(obj): try: obj + '' except (TypeError, ValueError): return False return True # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) own_fh = False if _is_string_like(fname): own_fh = True if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: if os.sys.version_info[0] >= 3: fh = open(fname, 'wb') else: fh = open(fname, 'w') elif hasattr(fname, 'write'): fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '%10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [ ' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [ fmt, ] * ncol format = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif ((not iscomplex_X) and n_fmt_chars != ncol): raise error else: format = fmt else: raise ValueError('invalid fmt: %r' % (fmt, )) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(asbytes(comments + header + newline)) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) fh.write(asbytes(format % tuple(row2) + newline)) else: for row in X: fh.write(asbytes(format % tuple(row) + newline)) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(asbytes(comments + footer + newline)) finally: if own_fh: fh.close()
def build_module_distutils(source_files, config_code, module_name, **kw): """ Build a module via distutils and import it. """ from numpy.distutils.misc_util import Configuration from numpy.distutils.core import setup d = get_module_dir() # Copy files dst_sources = [] for fn in source_files: if not os.path.isfile(fn): raise RuntimeError("%s is not a file" % fn) dst = os.path.join(d, os.path.basename(fn)) shutil.copyfile(fn, dst) dst_sources.append(dst) # Build script config_code = textwrap.dedent(config_code).replace("\n", "\n ") code = """\ import os import sys sys.path = %(syspath)s def configuration(parent_name='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('', parent_name, top_path) %(config_code)s return config if __name__ == "__main__": from numpy.distutils.core import setup setup(configuration=configuration) """ % dict(config_code=config_code, syspath=repr(sys.path)) script = os.path.join(d, get_temp_module_name() + '.py') dst_sources.append(script) f = open(script, 'wb') f.write(asbytes(code)) f.close() # Build cwd = os.getcwd() try: os.chdir(d) cmd = [sys.executable, script, 'build_ext', '-i'] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = p.communicate() if p.returncode != 0: raise RuntimeError("Running distutils build failed: %s\n%s" % (cmd[4:], asstr(out))) finally: os.chdir(cwd) # Partial cleanup for fn in dst_sources: os.unlink(fn) # Import __import__(module_name) return sys.modules[module_name]
def build_module_distutils(source_files, config_code, module_name, **kw): """ Build a module via distutils and import it. """ from numpy.distutils.misc_util import Configuration from numpy.distutils.core import setup d = get_module_dir() # Copy files dst_sources = [] for fn in source_files: if not os.path.isfile(fn): raise RuntimeError("%s is not a file" % fn) dst = os.path.join(d, os.path.basename(fn)) shutil.copyfile(fn, dst) dst_sources.append(dst) # Build script config_code = textwrap.dedent(config_code).replace("\n", "\n ") code = """\ import os import sys sys.path = %(syspath)s def configuration(parent_name='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('', parent_name, top_path) %(config_code)s return config if __name__ == "__main__": from numpy.distutils.core import setup setup(configuration=configuration) """ % dict(config_code=config_code, syspath = repr(sys.path)) script = os.path.join(d, get_temp_module_name() + '.py') dst_sources.append(script) f = open(script, 'wb') f.write(asbytes(code)) f.close() # Build cwd = os.getcwd() try: os.chdir(d) cmd = [sys.executable, script, 'build_ext', '-i'] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out, err = p.communicate() if p.returncode != 0: raise RuntimeError("Running distutils build failed: %s\n%s" % (cmd[4:], asstr(out))) finally: os.chdir(cwd) # Partial cleanup for fn in dst_sources: os.unlink(fn) # Import __import__(module_name) return sys.modules[module_name]
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', preamble=None, comment_character='#', header=None): """ Save an array to a text file. Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : array_like Data to be saved to a text file. fmt : str or sequence of strs A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. delimiter : str Character separating columns. newline : str .. versionadded:: 1.5.0 Character separating lines. preamble : str or sequence of strs, optional If specified, the content of the strings will be added at the top of the file. Each line will be preceded by the `comment_character` string and terminated by `newline`. In default configuration, numpy.loadtxt recognizes the preamble as a comment and, thus, ignores it. comment_character : str, optional The string which is intended to introduce the lines of the `preamble` (default is '#'). Please note that numpy.loadtxt uses the `comments` keyword to refer to this string. header : bool or sequence of strs, optional The column names; will be added after the `preamble` (if specified), but before the data. If header is 'True', the names will either be inferred from `X`.dtype.names or default names (f1, f2..fn) will be used. If a sequence of strs is given, these will be used as names. See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into a ``.npz`` compressed archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to preceed result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. Explanation of `preamble` and `header`: Both preamble and header are used to add information on the data at the top of the file. In default configuration, the preamble preserves compatibility of the output file with numpy.loadtxt, while the header not necessarily does. The header is a single line containing, for example, the name of each column. While this is very desirable, it may become a problem when the data is recovered with numpy.loadtxt, which (in default configuration) does not recognize the header and probably fails, because it cannot convert the header entries to the target type. Additionally, a preamble can be used to put an introductory text at the top of the file. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> x = y = z = np.arange(0.0,5.0,1.0) >>> np.savetxt('test.out', x, delimiter=',') # X is an array >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation >>> recar = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']}) >>> np.savetxt('test.out', recar, header=True) >>> np.savetxt('test.out', recar, preamble=['This is the most','important result.']) >>> np.savetxt('test.out', recar, header=['Apples', 'Oranges']) """ import sys import numpy as np from numpy.compat import asbytes, asstr, asbytes_nested, bytes def _is_string_like(v): try: v + '' except: return False return True # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) own_fh = False if _is_string_like(fname): own_fh = True if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: if sys.version_info[0] >= 3: fh = open(fname, 'wb') else: fh = open(fname, 'w') elif hasattr(fname, 'seek'): fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = 0 for desc in X.dtype.descr: if len(desc) == 3: ncol = ncol + desc[2] else: ncol = ncol + 1 else: ncol = X.shape[1] # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = asstr(delimiter).join(map(asstr, fmt)) elif type(fmt) is str: if fmt.count('%') == 1: fmt = [ fmt, ] * ncol format = delimiter.join(fmt) elif fmt.count('%') != ncol: raise AttributeError( 'fmt has wrong number of %% formats. %s' % fmt) else: format = fmt if preamble is not None: # A comment object was specified. # Check whether the comment_character is valid. if not _is_string_like(comment_character): raise ValueError("'comment_character' has to be a string.") # If the comment is a plane string, make it an element of a list # to be iterated over below. if _is_string_like(preamble): preamble = [preamble] # Iterate over comment argument if possible. if hasattr(preamble, '__iter__'): for co in preamble: # Check whether individual elements are string-like if _is_string_like(co): # Remove trailing newline character and split string # at occurrence of newline character to avoid new # preamble lines without introducing comment character. co = co.rstrip(newline) cosplit = co.split(newline) for c in cosplit: fh.write( asbytes(''.join( (comment_character, c, newline)))) if header is not None: # Check whether column names are given as strings, or names # must be infered from dtype. if isinstance(header, bool): if header: # If possible, infer column names from dtype and use default # (f1, f2 .. fn) otherwise. if X.dtype.names is not None: column_names = X.dtype.names else: column_names = [] for i in range(ncol): column_names.append(''.join(('f', str(i + 1)))) # Check whether column names are given as a list/tuple and check number. elif isinstance(header, list) or isinstance(header, tuple): if len(header) != ncol: raise ValueError(''.join( ("Table has %i column(s) but header has %i entrie(s)" % (ncol, len(header))))) column_names = header # Write column names (seperated by delimiter). headerline = '' for colname in column_names: headerline = ''.join((headerline, colname, delimiter)) headerline = headerline.rstrip(delimiter) fh.write(asbytes(''.join((headerline, newline)))) for row in X: fh.write(asbytes(format % tuple(row) + newline)) finally: if own_fh: fh.close()
def savetxt_nice(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None): """ Save an array to a text file. This is 95% a backport of numpy 1.15.1's savetxt. It does not support: - DataSource's URL - pathlib fnames - gz files Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : 1D or 2D array_like Data to be saved to a text file. fmt : str or sequence of strs, optional A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. For complex `X`, the legal options for `fmt` are: * a single specifier, `fmt='%.4e'`, resulting in numbers formatted like `' (%s+%sj)' % (fmt, fmt)` * a full string specifying every real and imaginary part, e.g. `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns * a list of specifiers, one per column - in this case, the real and imaginary part must have separate specifiers, e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns delimiter : str, optional String or character separating columns. newline : str, optional String or character separating lines. .. versionadded:: 1.5.0 header : str, optional String that will be written at the beginning of the file. .. versionadded:: 1.7.0 footer : str, optional String that will be written at the end of the file. .. versionadded:: 1.7.0 comments : str, optional String that will be prepended to the ``header`` and ``footer`` strings, to mark them as comments. Default: '# ', as expected by e.g. ``numpy.loadtxt``. .. versionadded:: 1.7.0 encoding : {None, str}, optional Encoding used to encode the outputfile. Does not apply to output streams. If the encoding is something other than 'bytes' or 'latin1' you will not be able to load the file in NumPy versions < 1.14. Default is 'latin1'. .. versionadded:: 1.14.0 See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into an uncompressed ``.npz`` archive savez_compressed : Save several arrays into a compressed ``.npz`` archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to precede result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> x = y = z = np.arange(0.0,5.0,1.0) >>> np.savetxt('test.out', x, delimiter=',') # X is an array >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation """ # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) class WriteWrap: """Convert to unicode in py2 or to bytes on bytestream inputs.""" def __init__(self, fh, encoding): self.fh = fh self.encoding = encoding self.do_write = self.first_write def close(self): self.fh.close() def write(self, v): self.do_write(v) def write_bytes(self, v): if isinstance(v, bytes): self.fh.write(v) else: self.fh.write(v.encode(self.encoding)) def write_normal(self, v): self.fh.write(asunicode(v)) def first_write(self, v): try: self.write_normal(v) self.write = self.write_normal except TypeError: # input is probably a bytestream self.write_bytes(v) self.write = self.write_bytes own_fh = False #if is_pathlib_path(fname): #fname = str(fname) if _is_string_like(fname): # datasource doesn't support creating a new file ... open(fname, 'wt').close() fh = open(fname, 'wt', encoding=encoding) own_fh = True # need to convert str to unicode for text io output if sys.version_info[0] == 2: fh = WriteWrap(fh, encoding or 'latin1') elif hasattr(fname, 'write'): # wrap to handle byte output streams fh = WriteWrap(fname, encoding or 'latin1') #if _is_string_like(fname): #own_fh = True #if fname.endswith('.gz'): #import gzip #fh = gzip.open(fname, 'wb') #else: #if sys.version_info[0] >= 3: #fh = open(fname, 'wb') #else: #fh = open(fname, 'w') #elif hasattr(fname, 'write'): #fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 0 or X.ndim > 2: raise ValueError( "Expected 1D or 2D array, got %dD array instead" % X.ndim) elif X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if isinstance(fmt, (list, tuple)): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) txt_format = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [fmt, ] * ncol txt_format = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif ((not iscomplex_X) and n_fmt_chars != ncol): raise error else: txt_format = fmt else: raise ValueError('invalid fmt: %r' % (fmt,)) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(comments + header + newline) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) s = txt_format % tuple(row2) + newline fh.write(s.replace('+-', '-')) else: for row in X: try: v = txt_format % tuple(row) + newline except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), txt_format)) fh.write(v) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(comments + footer + newline) finally: if own_fh: fh.close()
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', preamble=None, comment_character='#', header=None): """ Save an array to a text file. Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : array_like Data to be saved to a text file. fmt : str or sequence of strs A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. delimiter : str Character separating columns. newline : str .. versionadded:: 1.5.0 Character separating lines. preamble : str or sequence of strs, optional If specified, the content of the strings will be added at the top of the file. Each line will be preceded by the `comment_character` string and terminated by `newline`. In default configuration, numpy.loadtxt recognizes the preamble as a comment and, thus, ignores it. comment_character : str, optional The string which is intended to introduce the lines of the `preamble` (default is '#'). Please note that numpy.loadtxt uses the `comments` keyword to refer to this string. header : bool or sequence of strs, optional The column names; will be added after the `preamble` (if specified), but before the data. If header is 'True', the names will either be inferred from `X`.dtype.names or default names (f1, f2..fn) will be used. If a sequence of strs is given, these will be used as names. See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into a ``.npz`` compressed archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to preceed result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. Explanation of `preamble` and `header`: Both preamble and header are used to add information on the data at the top of the file. In default configuration, the preamble preserves compatibility of the output file with numpy.loadtxt, while the header not necessarily does. The header is a single line containing, for example, the name of each column. While this is very desirable, it may become a problem when the data is recovered with numpy.loadtxt, which (in default configuration) does not recognize the header and probably fails, because it cannot convert the header entries to the target type. Additionally, a preamble can be used to put an introductory text at the top of the file. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> x = y = z = np.arange(0.0,5.0,1.0) >>> np.savetxt('test.out', x, delimiter=',') # X is an array >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation >>> recar = np.zeros(3, dtype={'names':['col1', 'col2'], 'formats':['i4','f4']}) >>> np.savetxt('test.out', recar, header=True) >>> np.savetxt('test.out', recar, preamble=['This is the most','important result.']) >>> np.savetxt('test.out', recar, header=['Apples', 'Oranges']) """ import sys import numpy as np from numpy.compat import asbytes, asstr, asbytes_nested, bytes def _is_string_like(v): try: v + '' except: return False return True # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) own_fh = False if _is_string_like(fname): own_fh = True if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: if sys.version_info[0] >= 3: fh = open(fname, 'wb') else: fh = open(fname, 'w') elif hasattr(fname, 'seek'): fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = 0 for desc in X.dtype.descr: if len(desc) == 3: ncol = ncol + desc[2] else : ncol = ncol + 1 else: ncol = X.shape[1] # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = asstr(delimiter).join(map(asstr, fmt)) elif type(fmt) is str: if fmt.count('%') == 1: fmt = [fmt, ]*ncol format = delimiter.join(fmt) elif fmt.count('%') != ncol: raise AttributeError('fmt has wrong number of %% formats. %s' % fmt) else: format = fmt if preamble is not None: # A comment object was specified. # Check whether the comment_character is valid. if not _is_string_like(comment_character): raise ValueError("'comment_character' has to be a string.") # If the comment is a plane string, make it an element of a list # to be iterated over below. if _is_string_like(preamble): preamble = [preamble] # Iterate over comment argument if possible. if hasattr(preamble, '__iter__'): for co in preamble: # Check whether individual elements are string-like if _is_string_like(co): # Remove trailing newline character and split string # at occurrence of newline character to avoid new # preamble lines without introducing comment character. co = co.rstrip(newline) cosplit = co.split(newline) for c in cosplit: fh.write(asbytes(''.join((comment_character,c,newline)))) if header is not None: # Check whether column names are given as strings, or names # must be infered from dtype. if isinstance(header, bool): if header: # If possible, infer column names from dtype and use default # (f1, f2 .. fn) otherwise. if X.dtype.names is not None: column_names = X.dtype.names else: column_names = [] for i in range(ncol): column_names.append(''.join(('f',str(i+1)))) # Check whether column names are given as a list/tuple and check number. elif isinstance(header, list) or isinstance(header, tuple): if len(header) != ncol: raise ValueError(''.join(("Table has %i column(s) but header has %i entrie(s)" % (ncol,len(header))))) column_names = header # Write column names (seperated by delimiter). headerline = '' for colname in column_names: headerline = ''.join((headerline, colname, delimiter)) headerline = headerline.rstrip(delimiter) fh.write(asbytes(''.join((headerline,newline)))) for row in X: fh.write(asbytes(format % tuple(row) + newline)) finally: if own_fh: fh.close()
def info(self, source): """ Return size, storage parameters from Matrix Market file-like 'source'. Parameters ---------- source : str or file-like Matrix Market filename (extension .mtx) or open file-like object Returns ------- rows : int Number of matrix rows. cols : int Number of matrix columns. entries : int Number of non-zero entries of a sparse matrix or rows*cols for a dense matrix. format : str Either 'coordinate' or 'array'. field : str Either 'real', 'complex', 'pattern', or 'integer'. symmetry : str Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. """ stream, close_it = self._open(source) try: # read and validate header line line = stream.readline() mmid, matrix, format, field, symmetry = \ [asstr(part.strip()) for part in line.split()] if not mmid.startswith('%%MatrixMarket'): raise ValueError('source is not in Matrix Market format') if not matrix.lower() == 'matrix': raise ValueError("Problem reading file header: " + line) # http://math.nist.gov/MatrixMarket/formats.html if format.lower() == 'array': format = self.FORMAT_ARRAY elif format.lower() == 'coordinate': format = self.FORMAT_COORDINATE # skip comments while line.startswith(b'%'): line = stream.readline() line = line.split() if format == self.FORMAT_ARRAY: if not len(line) == 2: raise ValueError("Header line not of length 2: " + line) rows, cols = map(int, line) entries = rows * cols else: if not len(line) == 3: raise ValueError("Header line not of length 3: " + line) rows, cols, entries = map(int, line) return (rows, cols, entries, format, field.lower(), symmetry.lower()) finally: if close_it: stream.close()