def getData(url): """ Get tabular data from either a text file or HDF5 table, and return it as a numpy array. """ path = getFromURL(url) if use_hdf5 and tables.isHDF5File(path): h5file = tables.openFile(path, mode="r") assert len(h5file.listNodes(h5file.root, classname="Table")) == 1, "File contains more than one table." table = h5file.listNodes(h5file.root, classname="Table")[0] arr = numpy.zeros((table.nrows, len(table.colnames)), "float") for i, row in enumerate(table): for j, colname in enumerate(table.colnames): arr[i, j] = row[colname] elif os.path.isfile(path): f = srblib.urlopen(url) arr = numpy.array([l.split() for l in f.readlines() if l[0] != "#"]).astype("float") f.close() else: raise Exception("Not a valid file.") return arr
def getData(url): """ Get tabular data from either a text file or HDF5 table, and return it as a numpy array. """ path = getFromURL(url) if use_hdf5 and tables.isHDF5File(path): h5file = tables.openFile(path, mode='r') assert len(h5file.listNodes( h5file.root, classname="Table")) == 1, "File contains more than one table." table = h5file.listNodes(h5file.root, classname="Table")[0] arr = numpy.zeros((table.nrows, len(table.colnames)), 'float') for i, row in enumerate(table): for j, colname in enumerate(table.colnames): arr[i, j] = row[colname] elif os.path.isfile(path): f = srblib.urlopen(url) arr = numpy.array([l.split() for l in f.readlines() if l[0] != '#']).astype('float') f.close() else: raise Exception("Not a valid file.") return arr
def txt_to_hdf5(input_url, output_url): """ Convert a data file in ascii format to a simple HDF5 file containing a single table. The data file should contain numerical data in columns separated by spaces and/or tabs. Lines beginning with '#' are taken to be comments. Column headings can be specified as a comment on the first line of the file. Example: # time voltage 0.0 -65.0 0.1 -64.8 """ if not use_hdf5: raise Exception("PyTables not installed.") f = srblib.urlopen(input_url) contents = f.readlines() f.close() # extract header information firstline = contents[0] headers = [] udict = {} if firstline[0] == "#": if '"' in firstline: headers = [h.strip() for h in firstline[1:].split('"') if h and h.strip()] else: headers = firstline[1:].split() for h, colname in enumerate(headers): # check for units p = colname.find("(") if p > 0: units = colname[p:] colname = colname[:p].strip().replace(" ", "_") udict[colname] = units headers[h] = colname.strip().replace(" ", "_") secondline = contents[1] nfields = len(secondline.split()) if len(headers) != nfields: headers = ["col%d" % i for i in range(1, nfields + 1)] # create row format class_str = "class DataRow(tables.IsDescription):\n" for pos, colname in enumerate(headers): class_str += " %s = tables.Float32Col(pos=%d)\n" % (colname, pos) exec (class_str) # create HDF5 file protocol, path = output_url.split("://") path, ext = os.path.splitext(path) filename = os.path.basename(path) assert protocol == "file", "For now, we only support writing to local files." h5file = tables.openFile("%s.h5" % path, mode="w", title=filename) # create and fill table table = h5file.createTable(h5file.root, filename, DataRow, "Converted from %s" % input_url) for k, v in udict.items(): table.attrs.__setattr__("units_%s" % k, v) for line in contents[1:]: line = line.strip() data = [float(x) for x in line.split()] row = table.row for col, val in zip(headers, data): row[col] = val row.append() table.flush() h5file.close()
def txt_to_hdf5(input_url, output_url): """ Convert a data file in ascii format to a simple HDF5 file containing a single table. The data file should contain numerical data in columns separated by spaces and/or tabs. Lines beginning with '#' are taken to be comments. Column headings can be specified as a comment on the first line of the file. Example: # time voltage 0.0 -65.0 0.1 -64.8 """ if not use_hdf5: raise Exception("PyTables not installed.") f = srblib.urlopen(input_url) contents = f.readlines() f.close() # extract header information firstline = contents[0] headers = [] udict = {} if firstline[0] == '#': if '"' in firstline: headers = [ h.strip() for h in firstline[1:].split('"') if h and h.strip() ] else: headers = firstline[1:].split() for h, colname in enumerate(headers): # check for units p = colname.find("(") if p > 0: units = colname[p:] colname = colname[:p].strip().replace(" ", "_") udict[colname] = units headers[h] = colname.strip().replace(" ", "_") secondline = contents[1] nfields = len(secondline.split()) if len(headers) != nfields: headers = ['col%d' % i for i in range(1, nfields + 1)] # create row format class_str = "class DataRow(tables.IsDescription):\n" for pos, colname in enumerate(headers): class_str += " %s = tables.Float32Col(pos=%d)\n" % (colname, pos) exec(class_str) # create HDF5 file protocol, path = output_url.split('://') path, ext = os.path.splitext(path) filename = os.path.basename(path) assert protocol == 'file', "For now, we only support writing to local files." h5file = tables.openFile('%s.h5' % path, mode='w', title=filename) # create and fill table table = h5file.createTable(h5file.root, filename, DataRow, "Converted from %s" % input_url) for k, v in udict.items(): table.attrs.__setattr__("units_%s" % k, v) for line in contents[1:]: line = line.strip() data = [float(x) for x in line.split()] row = table.row for col, val in zip(headers, data): row[col] = val row.append() table.flush() h5file.close()