def read_ascii(fname, labels=None, simple_labels=False, sort=False, sort_column=0, _larch=None): """read a column ascii column file, returning a group containing the data from the file. read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0) Arguments --------- fname (str) name of file to read labels (list or None) list of labels to use for column labels [None] simple_labels (bool) whether to force simple column labels (note 1) [False] sort (bool) whether to sort row data (note 2) [False] sort_column (int) column to use for sorting (note 2) [0] Returns -------- group containing data read from file Notes ----- 1. column labels. If `labels` is left the default value of `None`, column labels will be tried to be created from the line immediately preceeding the data, or using 'col1', 'col2', etc if column labels cannot be figured out. The labels will be used as names for the 1-d arrays for each column. If `simple_labels` is `True`, the names 'col1', 'col2' etc will be used regardless of the column labels found in the file. 2. sorting. Data can be sorted to be in increasing order of any column, by giving the column index (starting from 0). 3. header parsing. If header lineas are of the forms of KEY : VAL KEY = VAL these will be parsed into a 'attrs' dictionary in the returned group. The returned group will have a number of members: GROUP.filename: text name of the file GROUP.column_labels: column labels, names of 1-D arrays GROUP.data: 2-dimensional data (ncolumns, nrows) GROUP.header: array of text lines of the header. GROUP.footer: array of text lines of the footer (text after the block of numerical data) GROUP.attrs : group of attributes parsed from header lines """ if not os.path.isfile(fname): raise OSError("File not found: '%s'" % fname) if os.stat(fname).st_size > MAX_FILESIZE: raise OSError("File '%s' too big for read_ascii()" % fname) finp = open(fname, "r") text = finp.readlines() finp.close() _labelline, ncol = None, None data, footers, headers = [], [], [] text.reverse() section = "FOOTER" for line in text: line = line[:-1].strip() if len(line) < 1: continue # look for section transitions (going from bottom to top) if section == "FOOTER" and getfloats(line) is not None: section = "DATA" elif section == "DATA" and getfloats(line) is None: section = "HEADER" _labelline = line # act of current section: if section == "FOOTER": footers.append(line) elif section == "HEADER": headers.append(line) elif section == "DATA": rowdat = getfloats(line) if ncol is None: ncol = len(rowdat) if ncol == len(rowdat): data.append(rowdat) if simple_labels: _labelline = None # reverse header, footer, data, convert to arrays footers.reverse() headers.reverse() data.reverse() data = np.array(data).transpose() # try to parse attributes from header text header_attrs = {} for hline in headers: hline = hline.strip().replace("\t", " ") if len(hline) < 1: continue if hline[0] in COMMENTCHARS: hline = hline[1:].strip() keywds = [] if ":" in hline: # keywords in 'x: 22' words = hline.split(":", 1) keywds = words[0].split() elif "=" in hline: # keywords in 'x = 22' words = hline.split("=", 1) keywds = words[0].split() if len(keywds) == 1: key = colname(keywds[0]) if key.startswith("_"): key = key[1:] if len(words) > 1: header_attrs[key] = words[1].strip() ncols, nrow = data.shape # set column labels _labels = ["col%i" % (i + 1) for i in range(ncols)] if labels is None: if _labelline is None: _labelline = " ".join(_labels) if _labelline[0] in COMMENTCHARS: _labelline = _labelline[1:].strip() _labelline = _labelline.lower() try: labels = [colname(l) for l in _labelline.split()] except: labels = [] elif isinstance(labels, str): labels = labels.replace(",", " ") labels = [colname(l) for l in labels.split()] for i, lab in enumerate(labels): try: _labels[i] = lab except: pass attrs = {"filename": fname} attrs["column_labels"] = _labels attrs["array_labels"] = _labels if sort and sort_column >= 0 and sort_column < nrow: data = data[:, np.argsort(data[sort_column])] group = Group( name="ascii_file %s" % fname, filename=fname, header=headers, column_labels=_labels, array_labels=_labels, data=data, ) if len(footers) > 0: group.footer = footers for i, nam in enumerate(_labels): setattr(group, nam.lower(), data[i]) group.attrs = Group(name="header attributes from %s" % fname) for key, val in header_attrs.items(): setattr(group.attrs, key, val) return group
def read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0, delimeter=None, _larch=None): """read a column ascii column file, returning a group containing the data extracted from the file. read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0) Arguments --------- filename (str) name of file to read labels (list or None) list of labels to use for array labels [None] simple_labels (bool) whether to force simple column labels (note 1) [False] delimeter (str) string to use to split label line sort (bool) whether to sort row data (note 2) [False] sort_column (int) column to use for sorting (note 2) [0] Returns -------- group containing data read from file Notes ----- 1. array labels. If `labels` is `None` (he default value), column labels (and so, names of 1d arrays) will be tried to be determined from the file header. This often means parsing the final header line, but tagged column files from several XAFS beamlines will be tried and used if matching. Column labels may be like 'col1', 'col2', etc if suitable column labels cannot be guessed. These labels will be used as names for the 1-d arrays from each column. If `simple_labels` is `True`, the names 'col1', 'col2' etc will be used regardless of the column labels found in the file. 2. sorting. Data can be sorted to be in increasing order of any column, by giving the column index (starting from 0). 3. header parsing. If header lines are of the forms of KEY : VAL KEY = VAL these will be parsed into a 'attrs' dictionary in the returned group. The returned group will have a number of members: GROUP.filename: text name of the file GROUP.array_labels: array labels, names of 1-D arrays GROUP.data: 2-dimensional data (ncolumns, nrows) GROUP.header: array of text lines of the header. GROUP.footer: array of text lines of the footer (text after the block of numerical data) GROUP.attrs : group of attributes parsed from header lines """ if not os.path.isfile(filename): raise OSError("File not found: '%s'" % filename) if os.stat(filename).st_size > MAX_FILESIZE: raise OSError("File '%s' too big for read_ascii()" % filename) with open(filename, 'r') as fh: text = fh.read() text = text.replace('\r\n', '\n').replace('\r', '\n').split('\n') ncol = None data, footers, headers = [], [], [] text.reverse() section = 'FOOTER' for line in text: line = line.strip() if len(line) < 1: continue # look for section transitions (going from bottom to top) if section == 'FOOTER' and not None in getfloats(line): section = 'DATA' elif section == 'DATA' and None in getfloats(line): section = 'HEADER' # act of current section: if section == 'FOOTER': footers.append(line) elif section == 'HEADER': headers.append(line) elif section == 'DATA': rowdat = getfloats(line) if ncol is None: ncol = len(rowdat) elif ncol > len(rowdat): rowdat.extend([np.nan] * (ncol - len(rowdat))) elif ncol < len(rowdat): for i in data: i.extend([np.nan] * (len(rowdat) - ncol)) ncol = len(rowdat) data.append(rowdat) # reverse header, footer, data, convert to arrays footers.reverse() headers.reverse() data.reverse() data = np.array(data).transpose() # try to parse attributes from header text header_attrs = {} for hline in headers: hline = hline.strip().replace('\t', ' ') if len(hline) < 1: continue if hline[0] in COMMENTCHARS: hline = hline[1:].strip() keywds = [] if ':' in hline: # keywords in 'x: 22' words = hline.split(':', 1) keywds = words[0].split() elif '=' in hline: # keywords in 'x = 22' words = hline.split('=', 1) keywds = words[0].split() if len(keywds) == 1: key = colname(keywds[0]) if key.startswith('_'): key = key[1:] if len(words) > 1: header_attrs[key] = words[1].strip() if sort and sort_column >= 0 and sort_column < ncol: data = data[:, np.argsort(data[sort_column])] path, fname = os.path.split(filename) attrs = {'filename': filename} group = Group(name='ascii_file %s' % filename, path=filename, filename=fname, header=headers, data=data) if len(footers) > 0: group.footer = footers group.attrs = Group(name='header attributes from %s' % filename) for key, val in header_attrs.items(): setattr(group.attrs, key, val) if isinstance(labels, str): for bchar in ',#@%|:*': labels = labels.replace(bchar, '') labels = labels.split() if labels is None and not simple_labels: bldat = guess_beamline(headers)(headers) labels = bldat.get_array_labels() if getattr(bldat, 'energy_units', 'eV') != 'eV': group.energy_units = bldat.energy_units if getattr(bldat, 'energy_column', 1) != 1: group.energy_column = bldat.energy_column if getattr(bldat, 'mono_dspace', -1) > 0: group.mono_dspace = bldat.mono_dspace set_array_labels(group, labels=labels, simple_labels=simple_labels) return group
def read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0, _larch=None): """read a column ascii column file, returning a group containing the data extracted from the file. read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0) Arguments --------- filename (str) name of file to read labels (list or None) list of labels to use for column labels [None] simple_labels (bool) whether to force simple column labels (note 1) [False] sort (bool) whether to sort row data (note 2) [False] sort_column (int) column to use for sorting (note 2) [0] Returns -------- group containing data read from file Notes ----- 1. column labels. If `labels` is left the default value of `None`, column labels will be tried to be created from the line immediately preceeding the data, or using 'col1', 'col2', etc if column labels cannot be figured out. The labels will be used as names for the 1-d arrays for each column. If `simple_labels` is `True`, the names 'col1', 'col2' etc will be used regardless of the column labels found in the file. 2. sorting. Data can be sorted to be in increasing order of any column, by giving the column index (starting from 0). 3. header parsing. If header lineas are of the forms of KEY : VAL KEY = VAL these will be parsed into a 'attrs' dictionary in the returned group. The returned group will have a number of members: GROUP.filename: text name of the file GROUP.array_labels: array labels, names of 1-D arrays GROUP.data: 2-dimensional data (ncolumns, nrows) GROUP.header: array of text lines of the header. GROUP.footer: array of text lines of the footer (text after the block of numerical data) GROUP.attrs : group of attributes parsed from header lines """ if not os.path.isfile(filename): raise OSError("File not found: '%s'" % filename) if os.stat(filename).st_size > MAX_FILESIZE: raise OSError("File '%s' too big for read_ascii()" % filename) with open(filename, 'r') as fh: text = fh.read() text = text.replace('\r\n', '\n').replace('\r', '\n').split('\n') _labelline = None ncol = None data, footers, headers = [], [], [] text.reverse() section = 'FOOTER' for line in text: line = line.strip() if len(line) < 1: continue # look for section transitions (going from bottom to top) if section == 'FOOTER' and not None in getfloats(line): section = 'DATA' elif section == 'DATA' and None in getfloats(line): section = 'HEADER' _labelline = line if _labelline[0] in COMMENTCHARS: _labelline = _labelline[1:].strip() # act of current section: if section == 'FOOTER': footers.append(line) elif section == 'HEADER': headers.append(line) elif section == 'DATA': rowdat = getfloats(line) if ncol is None: ncol = len(rowdat) if ncol == len(rowdat): data.append(rowdat) # reverse header, footer, data, convert to arrays footers.reverse() headers.reverse() data.reverse() data = np.array(data).transpose() # try to parse attributes from header text header_attrs = {} for hline in headers: hline = hline.strip().replace('\t', ' ') if len(hline) < 1: continue if hline[0] in COMMENTCHARS: hline = hline[1:].strip() keywds = [] if ':' in hline: # keywords in 'x: 22' words = hline.split(':', 1) keywds = words[0].split() elif '=' in hline: # keywords in 'x = 22' words = hline.split('=', 1) keywds = words[0].split() if len(keywds) == 1: key = colname(keywds[0]) if key.startswith('_'): key = key[1:] if len(words) > 1: header_attrs[key] = words[1].strip() ncols, nrow = data.shape # set column labels from label line _labels = None _clabels = ['col%i' % (i + 1) for i in range(ncols)] if labels is not None: labels = labels.replace(',', ' ').replace('\t', ' ') _labels = [colname(l) for l in labels.split()] elif simple_labels or _labelline is None: _labels = _clabels else: _labelline = _labelline.lower() for delim in ('\t', ','): if delim in _labelline: _labs = [colname(l) for l in _labelline.split(delim)] if len(_labs) > int(1 + ncols / 2.0): _labels = _labs break if _labels is None: _labelline = _labelline.replace(', ', ' ').replace('\t', ' ') _labels = [colname(l) for l in _labelline.split()] if _labels is None: _labels = _clabels if len(_labels) < ncols: for i in range(len(_labels), ncols): _labels.append("col%i" % (i + 1)) elif len(_labels) > ncols: _labels = _labels[:ncols] attrs = {'filename': filename} attrs['column_labels'] = attrs['array_labels'] = _labels if sort and sort_column >= 0 and sort_column < ncol: data = data[:, np.argsort(data[sort_column])] group = Group(name='ascii_file %s' % filename, filename=filename, header=headers, data=data, array_labels=_labels, column_labels=_labels) if len(footers) > 0: group.footer = footers for i in range(ncols): nam = _labels[i].lower() if nam in ('data', 'array_labels', 'filename', 'attrs', 'header', 'footer'): nam = "%s_" % nam setattr(group, nam, data[i]) group.attrs = Group(name='header attributes from %s' % filename) for key, val in header_attrs.items(): setattr(group.attrs, key, val) return group
def read_ascii(fname, labels=None, sort=False, sort_column=0, _larch=None): """read a column ascii column file, returning a group containing the data from the file. read_ascii(filename, labels=None, sort=False, sort_column=0) If the header is one of the forms of KEY : VAL KEY = VAL these will be parsed into a 'attrs' dictionary in the returned group. If labels is left the default value of None, column labels will be tried to be created from the line immediately preceeding the data, or using 'col1', 'col2', etc if column labels cannot be figured out. The labels will be used to create 1-d arrays for each column The group will have a 'data' component containing the 2-dimensional data, it will also have a 'header' component containing the text of the header -- an array of lines. If a footer (text after the block of numerical data) is in the file, the array of lines for this text will be put in the 'footer' component. """ if not os.path.isfile(fname): raise OSError("File not found: '%s'" % fname) if os.stat(fname).st_size > MAX_FILESIZE: raise OSError("File '%s' too big for read_ascii()" % fname) finp = open(fname, 'r') text = finp.readlines() finp.close() _labelline, ncol = None, None data, footers, headers = [], [], [] text.reverse() section = 'FOOTER' for line in text: line = line[:-1].strip() if len(line) < 1: continue # look for section transitions (going from bottom to top) if section == 'FOOTER' and getfloats(line) is not None: section = 'DATA' elif section == 'DATA' and getfloats(line) is None: section = 'HEADER' _labelline = line # act of current section: if section == 'FOOTER': footers.append(line) elif section == 'HEADER': headers.append(line) elif section == 'DATA': rowdat = getfloats(line) if ncol is None: ncol = len(rowdat) if ncol == len(rowdat): data.append(rowdat) # reverse header, footer, data, convert to arrays footers.reverse() headers.reverse() data.reverse() data = np.array(data).transpose() # try to parse attributes from header text header_attrs = {} for hline in headers: hline = hline.strip().replace('\t', ' ') if len(hline) < 1: continue if hline[0] in COMMENTCHARS: hline = hline[1:].strip() keywds = [] if ':' in hline: # keywords in 'x: 22' words = hline.split(':', 1) keywds = words[0].split() elif '=' in hline: # keywords in 'x = 22' words = hline.split('=', 1) keywds = words[0].split() if len(keywds) == 1: key = colname(keywds[0]) if key.startswith('_'): key = key[1:] if len(words) > 1: header_attrs[key] = words[1].strip() ncols, nrow = data.shape # set column labels _labels = ['col%i' % (i + 1) for i in range(ncols)] if labels is None: if _labelline is None: _labelline = ' '.join(_labels) if _labelline[0] in COMMENTCHARS: _labelline = _labelline[1:].strip() _labelline = _labelline.lower() try: labels = [colname(l) for l in _labelline.split()] except: labels = [] elif isinstance(labels, str): labels = labels.replace(',', ' ') labels = [colname(l) for l in labels.split()] for i, lab in enumerate(labels): try: _labels[i] = lab except: pass attrs = {'filename': fname} attrs['column_labels'] = _labels attrs['array_labels'] = _labels if sort and sort_column >= 0 and sort_column < nrow: data = data[:, np.argsort(data[sort_column])] group = Group(name='ascii_file %s' % fname, filename=fname, header=headers, column_labels=_labels, array_labels=_labels, data=data) if len(footers) > 0: group.footer = footers for i, nam in enumerate(_labels): setattr(group, nam.lower(), data[i]) group.attrs = Group(name='header attributes from %s' % fname) for key, val in header_attrs.items(): setattr(group.attrs, key, val) return group
def read_ascii(fname, labels=None, sort=False, ilabels=False, sort_column=0, _larch=None): """read a column ascii column file, returning a group containing the data from the file. read_ascii(filename, labels=None, sort=False, ilabels=False, sort_column=0) If the header is one of the forms of KEY : VAL KEY = VAL these will be parsed into a 'attrs' dictionary in the returned group. If labels is left the default value of None, column labels will be tried to be created from the line immediately preceeding the data, or using 'col1', 'col2', etc if column labels cannot be figured out. The labels will be used to create 1-d arrays for each column. If ilabels is True, the names 'col1', 'col2' etc will be used regardless of the column labels found in the file. The group will have a 'data' component containing the 2-dimensional data, it will also have a 'header' component containing the text of the header -- an array of lines. If a footer (text after the block of numerical data) is in the file, the array of lines for this text will be put in the 'footer' component. """ if not os.path.isfile(fname): raise OSError("File not found: '%s'" % fname) if os.stat(fname).st_size > MAX_FILESIZE: raise OSError("File '%s' too big for read_ascii()" % fname) finp = open(fname, 'r') text = finp.readlines() finp.close() _labelline, ncol = None, None data, footers, headers = [], [], [] text.reverse() section = 'FOOTER' for line in text: line = line[:-1].strip() if len(line) < 1: continue # look for section transitions (going from bottom to top) if section == 'FOOTER' and getfloats(line) is not None: section = 'DATA' elif section == 'DATA' and getfloats(line) is None: section = 'HEADER' _labelline = line # act of current section: if section == 'FOOTER': footers.append(line) elif section == 'HEADER': headers.append(line) elif section == 'DATA': rowdat = getfloats(line) if ncol is None: ncol = len(rowdat) if ncol == len(rowdat): data.append(rowdat) if ilabels: _labelline = None # reverse header, footer, data, convert to arrays footers.reverse() headers.reverse() data.reverse() data = np.array(data).transpose() # try to parse attributes from header text header_attrs = {} for hline in headers: hline = hline.strip().replace('\t', ' ') if len(hline) < 1: continue if hline[0] in COMMENTCHARS: hline = hline[1:].strip() keywds = [] if ':' in hline: # keywords in 'x: 22' words = hline.split(':', 1) keywds = words[0].split() elif '=' in hline: # keywords in 'x = 22' words = hline.split('=', 1) keywds = words[0].split() if len(keywds) == 1: key = colname(keywds[0]) if key.startswith('_'): key = key[1:] if len(words) > 1: header_attrs[key] = words[1].strip() ncols, nrow = data.shape # set column labels _labels = ['col%i' % (i+1) for i in range(ncols)] if labels is None: if _labelline is None: _labelline = ' '.join(_labels) if _labelline[0] in COMMENTCHARS: _labelline = _labelline[1:].strip() _labelline = _labelline.lower() try: labels = [colname(l) for l in _labelline.split()] except: labels = [] elif isinstance(labels, str): labels = labels.replace(',', ' ') labels = [colname(l) for l in labels.split()] for i, lab in enumerate(labels): try: _labels[i] = lab except: pass attrs = {'filename': fname} attrs['column_labels'] = _labels attrs['array_labels'] = _labels if sort and sort_column >= 0 and sort_column < nrow: data = data[:,np.argsort(data[sort_column])] group = Group(name='ascii_file %s' % fname, filename=fname, header=headers, column_labels=_labels, array_labels=_labels, data=data) if len(footers) > 0: group.footer = footers for i, nam in enumerate(_labels): setattr(group, nam.lower(), data[i]) group.attrs = Group(name='header attributes from %s' % fname) for key, val in header_attrs.items(): setattr(group.attrs, key, val) return group
def read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0, delimeter=None, _larch=None): """read a column ascii column file, returning a group containing the data extracted from the file. read_ascii(filename, labels=None, simple_labels=False, sort=False, sort_column=0) Arguments --------- filename (str) name of file to read labels (list or None) list of labels to use for column labels [None] simple_labels (bool) whether to force simple column labels (note 1) [False] delimeter (str) string to use to split label line sort (bool) whether to sort row data (note 2) [False] sort_column (int) column to use for sorting (note 2) [0] Returns -------- group containing data read from file Notes ----- 1. column labels. If `labels` is left the default value of `None`, column labels will be tried to be created from the line immediately preceeding the data and the provided delimeter, and may use 'col1', 'col2', etc if suitable column labels cannot be figured out. The labels will be used as names for the 1-d arrays for each column. If `simple_labels` is `True`, the names 'col1', 'col2' etc will be used regardless of the column labels found in the file. 2. sorting. Data can be sorted to be in increasing order of any column, by giving the column index (starting from 0). 3. header parsing. If header lines are of the forms of KEY : VAL KEY = VAL these will be parsed into a 'attrs' dictionary in the returned group. The returned group will have a number of members: GROUP.filename: text name of the file GROUP.array_labels: array labels, names of 1-D arrays GROUP.data: 2-dimensional data (ncolumns, nrows) GROUP.header: array of text lines of the header. GROUP.footer: array of text lines of the footer (text after the block of numerical data) GROUP.attrs : group of attributes parsed from header lines """ if not os.path.isfile(filename): raise OSError("File not found: '%s'" % filename) if os.stat(filename).st_size > MAX_FILESIZE: raise OSError("File '%s' too big for read_ascii()" % filename) with open(filename, 'r') as fh: text = fh.read() text = text.replace('\r\n', '\n').replace('\r', '\n').split('\n') labelline = None ncol = None data, footers, headers = [], [], [] text.reverse() section = 'FOOTER' for line in text: line = line.strip() if len(line) < 1: continue # look for section transitions (going from bottom to top) if section == 'FOOTER' and not None in getfloats(line): section = 'DATA' elif section == 'DATA' and None in getfloats(line): section = 'HEADER' labelline = line if labelline[0] in COMMENTCHARS: labelline = labelline[1:].strip() # act of current section: if section == 'FOOTER': footers.append(line) elif section == 'HEADER': headers.append(line) elif section == 'DATA': rowdat = getfloats(line) if ncol is None: ncol = len(rowdat) elif ncol > len(rowdat): rowdat.extend([np.nan]*(ncol-len(rowdat))) elif ncol < len(rowdat): for i in data: i.extend([np.nan]*(len(rowdat)-ncol)) ncol = len(rowdat) data.append(rowdat) # reverse header, footer, data, convert to arrays footers.reverse() headers.reverse() data.reverse() data = np.array(data).transpose() # try to parse attributes from header text header_attrs = {} for hline in headers: hline = hline.strip().replace('\t', ' ') if len(hline) < 1: continue if hline[0] in COMMENTCHARS: hline = hline[1:].strip() keywds = [] if ':' in hline: # keywords in 'x: 22' words = hline.split(':', 1) keywds = words[0].split() elif '=' in hline: # keywords in 'x = 22' words = hline.split('=', 1) keywds = words[0].split() if len(keywds) == 1: key = colname(keywds[0]) if key.startswith('_'): key = key[1:] if len(words) > 1: header_attrs[key] = words[1].strip() if sort and sort_column >= 0 and sort_column < ncol: data = data[:, np.argsort(data[sort_column])] path, fname = os.path.split(filename) attrs = {'filename': filename} group = Group(name='ascii_file %s' % filename, path=filename, filename=fname, header=headers, data=data) if len(footers) > 0: group.footer = footers group.attrs = Group(name='header attributes from %s' % filename) for key, val in header_attrs.items(): setattr(group.attrs, key, val) if isinstance(labels, six.string_types): labelline = labels labels = None set_array_labels(group, labels=labels, simple_labels=simple_labels, labelline=labelline, delimeter=delimeter) return group