def fromtextfile( fname, delimitor=None, commentchar="#", missingchar="", dates_column=None, varnames=None, vartypes=None, dates=None ): """Creates a multitimeseries from data stored in the file `filename`. :Parameters: - `filename` : file name/handle Handle of an opened file. - `delimitor` : Character *None* Alphanumeric character used to separate columns in the file. If None, any (group of) white spacestring(s) will be used. - `commentchar` : String *['#']* Alphanumeric character used to mark the start of a comment. - `missingchar` : String *['']* String indicating missing data, and used to create the masks. - `datescol` : Integer *[None]* Position of the columns storing dates. If None, a position will be estimated from the variable names. - `varnames` : Sequence *[None]* Sequence of the variable names. If None, a list will be created from the first non empty line of the file. - `vartypes` : Sequence *[None]* Sequence of the variables dtypes. If None, the sequence will be estimated from the first non-commented line. Ultra simple: the varnames are in the header, one line""" # Try to open the file ...................... f = openfile(fname) # Get the first non-empty line as the varnames while True: line = f.readline() firstline = line[: line.find(commentchar)].strip() _varnames = firstline.split(delimitor) if len(_varnames) > 1: break if varnames is None: varnames = _varnames # Get the data .............................. _variables = MA.asarray([line.strip().split(delimitor) for line in f if line[0] != commentchar and len(line) > 1]) (nvars, nfields) = _variables.shape # Check if we need to get the dates.......... if dates_column is None: dates_column = [i for (i, n) in enumerate(list(varnames)) if n.lower() in ["_dates", "dates"]] elif isinstance(dates_column, (int, float)): if dates_column > nfields: raise ValueError, "Invalid column number: %i > %i" % (dates_column, nfields) dates_column = [dates_column] if len(dates_column) > 0: cols = range(nfields) [cols.remove(i) for i in dates_column] newdates = date_array(_variables[:, dates_column[-1]]) _variables = _variables[:, cols] varnames = [varnames[i] for i in cols] if vartypes is not None: vartypes = [vartypes[i] for i in cols] nfields -= len(dates_column) else: newdates = None # Try to guess the dtype .................... if vartypes is None: vartypes = _guessvartypes(_variables[0]) else: vartypes = [numeric.dtype(v) for v in vartypes] if len(vartypes) != nfields: msg = "Attempting to %i dtypes for %i fields!" msg += " Reverting to default." warnings.warn(msg % (len(vartypes), nfields)) vartypes = _guessvartypes(_variables[0]) # Construct the descriptor .................. mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)] # Get the data and the mask ................. # We just need a list of masked_arrays. It's easier to create it like that: _mask = _variables.T == missingchar _datalist = [masked_array(a, mask=m, dtype=t) for (a, m, t) in zip(_variables.T, _mask, vartypes)] # newdates = __getdates(dates=dates, newdates=newdates, length=nvars, freq=None, start_date=None) return MultiTimeSeries(_datalist, dates=newdates, dtype=mdescr)
def fromarrays( arraylist, dates=None, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None ): """Creates a mrecarray from a (flat) list of masked arrays. :Parameters: - `arraylist` : Sequence A list of (masked) arrays. Each element of the sequence is first converted to a masked array if needed. If a 2D array is passed as argument, it is processed line by line - `dtype` : numeric.dtype Data type descriptor. - `shape` : Integer *[None]* Number of records. If None, `shape` is defined from the shape of the first array in the list. - `formats` : (Description to write) - `names` : (description to write) - `titles`: (Description to write) - `aligned`: Boolen *[False]* (Description to write, not used anyway) - `byteorder`: Boolen *[None]* (Description to write, not used anyway) """ arraylist = [MA.asarray(x) for x in arraylist] # Define/check the shape..................... if shape is None or shape == 0: shape = arraylist[0].shape if isinstance(shape, int): shape = (shape,) # Define formats from scratch ............... if formats is None and dtype is None: formats = _getformats(arraylist) # Define the dtype .......................... if dtype is not None: descr = numeric.dtype(dtype) _names = descr.names else: parsed = format_parser(formats, names, titles, aligned, byteorder) _names = parsed._names descr = parsed._descr # Determine shape from data-type............. if len(descr) != len(arraylist): msg = "Mismatch between the number of fields (%i) and the number of " "arrays (%i)" raise ValueError, msg % (len(descr), len(arraylist)) d0 = descr[0].shape nn = len(d0) if nn > 0: shape = shape[:-nn] # Make sure the shape is the correct one .... for k, obj in enumerate(arraylist): nn = len(descr[k].shape) testshape = obj.shape[: len(obj.shape) - nn] if testshape != shape: raise ValueError, "Array-shape mismatch in array %d" % k # Reconstruct the descriptor, by creating a _data and _mask version return MultiTimeSeries(arraylist, dtype=descr)