Exemplo n.º 1
0
def fromtextfile(
    fname, delimitor=None, commentchar="#", missingchar="", dates_column=None, varnames=None, vartypes=None, dates=None
):
    """Creates a multitimeseries from data stored in the file `filename`.

:Parameters:
    - `filename` : file name/handle
      Handle of an opened file.  
    - `delimitor` : Character *None*
      Alphanumeric character used to separate columns in the file.
      If None, any (group of) white spacestring(s) will be used.
    - `commentchar` : String *['#']*
      Alphanumeric character used to mark the start of a comment.
    - `missingchar` : String *['']*
      String indicating missing data, and used to create the masks.
    - `datescol` : Integer *[None]*
      Position of the columns storing dates. If None, a position will be 
      estimated from the variable names.
    - `varnames` : Sequence *[None]*
      Sequence of the variable names. If None, a list will be created from
      the first non empty line of the file.
    - `vartypes` : Sequence *[None]*
      Sequence of the variables dtypes. If None, the sequence will be estimated
      from the first non-commented line.  
    
    
    Ultra simple: the varnames are in the header, one line"""
    # Try to open the file ......................
    f = openfile(fname)
    # Get the first non-empty line as the varnames
    while True:
        line = f.readline()
        firstline = line[: line.find(commentchar)].strip()
        _varnames = firstline.split(delimitor)
        if len(_varnames) > 1:
            break
    if varnames is None:
        varnames = _varnames
    # Get the data ..............................
    _variables = MA.asarray([line.strip().split(delimitor) for line in f if line[0] != commentchar and len(line) > 1])
    (nvars, nfields) = _variables.shape
    # Check if we need to get the dates..........
    if dates_column is None:
        dates_column = [i for (i, n) in enumerate(list(varnames)) if n.lower() in ["_dates", "dates"]]
    elif isinstance(dates_column, (int, float)):
        if dates_column > nfields:
            raise ValueError, "Invalid column number: %i > %i" % (dates_column, nfields)
        dates_column = [dates_column]
    if len(dates_column) > 0:
        cols = range(nfields)
        [cols.remove(i) for i in dates_column]
        newdates = date_array(_variables[:, dates_column[-1]])
        _variables = _variables[:, cols]
        varnames = [varnames[i] for i in cols]
        if vartypes is not None:
            vartypes = [vartypes[i] for i in cols]
        nfields -= len(dates_column)
    else:
        newdates = None
    # Try to guess the dtype ....................
    if vartypes is None:
        vartypes = _guessvartypes(_variables[0])
    else:
        vartypes = [numeric.dtype(v) for v in vartypes]
        if len(vartypes) != nfields:
            msg = "Attempting to %i dtypes for %i fields!"
            msg += " Reverting to default."
            warnings.warn(msg % (len(vartypes), nfields))
            vartypes = _guessvartypes(_variables[0])
    # Construct the descriptor ..................
    mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)]
    # Get the data and the mask .................
    # We just need a list of masked_arrays. It's easier to create it like that:
    _mask = _variables.T == missingchar
    _datalist = [masked_array(a, mask=m, dtype=t) for (a, m, t) in zip(_variables.T, _mask, vartypes)]
    #
    newdates = __getdates(dates=dates, newdates=newdates, length=nvars, freq=None, start_date=None)
    return MultiTimeSeries(_datalist, dates=newdates, dtype=mdescr)
Exemplo n.º 2
0
def fromarrays(
    arraylist, dates=None, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None
):
    """Creates a mrecarray from a (flat) list of masked arrays.

:Parameters:
    - `arraylist` : Sequence
      A list of (masked) arrays. Each element of the sequence is first converted
      to a masked array if needed. If a 2D array is passed as argument, it is
      processed line by line
    - `dtype` : numeric.dtype
      Data type descriptor.
    - `shape` : Integer *[None]*
      Number of records. If None, `shape` is defined from the shape of the first
      array in the list.
    - `formats` :
      (Description to write)
    - `names` : 
      (description to write)
    - `titles`:
      (Description to write)
    - `aligned`: Boolen *[False]*
      (Description to write, not used anyway)   
    - `byteorder`: Boolen *[None]*
      (Description to write, not used anyway)
       

    """
    arraylist = [MA.asarray(x) for x in arraylist]
    # Define/check the shape.....................
    if shape is None or shape == 0:
        shape = arraylist[0].shape
    if isinstance(shape, int):
        shape = (shape,)
    # Define formats from scratch ...............
    if formats is None and dtype is None:
        formats = _getformats(arraylist)
    # Define the dtype ..........................
    if dtype is not None:
        descr = numeric.dtype(dtype)
        _names = descr.names
    else:
        parsed = format_parser(formats, names, titles, aligned, byteorder)
        _names = parsed._names
        descr = parsed._descr
    # Determine shape from data-type.............
    if len(descr) != len(arraylist):
        msg = "Mismatch between the number of fields (%i) and the number of " "arrays (%i)"
        raise ValueError, msg % (len(descr), len(arraylist))
    d0 = descr[0].shape
    nn = len(d0)
    if nn > 0:
        shape = shape[:-nn]
    # Make sure the shape is the correct one ....
    for k, obj in enumerate(arraylist):
        nn = len(descr[k].shape)
        testshape = obj.shape[: len(obj.shape) - nn]
        if testshape != shape:
            raise ValueError, "Array-shape mismatch in array %d" % k
    # Reconstruct the descriptor, by creating a _data and _mask version
    return MultiTimeSeries(arraylist, dtype=descr)