Пример #1
0
def read_swans(fileglob, ndays=None, int_freq=True, int_dir=False, dirorder=True, ntimes=None):
    """Read multiple swan files into single Dataset.

    Args:
        - fileglob (str, list): glob pattern specifying files to read.
        - ndays (float): number of days to keep from each file, choose None to
          keep entire period.
        - int_freq (ndarray, bool): frequency array for interpolating onto:
            - ndarray: 1d array specifying frequencies to interpolate onto.
            - True: logarithm array is constructed such that fmin=0.0418 Hz,
              fmax=0.71856 Hz, df=0.1f.
            - False: No interpolation performed in frequency space.
        - int_dir (ndarray, bool): direction array for interpolating onto:
            - ndarray: 1d array specifying directions to interpolate onto.
            - True: circular array is constructed such that dd=10 degrees.
            - False: No interpolation performed in direction space.
        - dirorder (bool): if True ensures directions are sorted.
        - ntimes (int): use it to read only specific number of times, useful
          for checking headers only.
        
    Returns:
        - dset (SpecDataset): spectra dataset object read from file with
          different sites and cycles concatenated along the 'site' and 'time'
          dimensions.

    Note:
        - If multiple cycles are provided, 'time' coordinate is replaced by
          'cycletime' multi-index coordinate.
        - If more than one cycle is prescribed from fileglob, each cycle must
          have same number of sites.
        - Either all or none of the spectra in fileglob must have tabfile
          associated to provide wind/depth data.
        - Concatenation is done with numpy arrays for efficiency.

    """
    swans = sorted(fileglob) if isinstance(fileglob, list) else sorted(glob.glob(fileglob))
    assert swans, 'No SWAN file identified with fileglob %s' % (fileglob)

    # Default spectral basis for interpolating
    if int_freq == True:
        int_freq = [0.04118 * 1.1**n for n in range(31)]
    elif int_freq == False:
        int_freq = None
    if int_dir == True:
        int_dir = np.arange(0, 360, 10)
    elif int_dir == False:
        int_dir = None

    cycles    = list()
    dsets     = SortedDict()
    tabs      = SortedDict()
    all_times = list()
    all_sites = SortedDict()
    all_lons  = SortedDict()
    all_lats  = SortedDict()
    deps      = SortedDict()
    wspds     = SortedDict()
    wdirs     = SortedDict()

    for filename in swans:
        swanfile = SwanSpecFile(filename, dirorder=dirorder)
        times = swanfile.times
        lons = list(swanfile.x)
        lats = list(swanfile.y)
        sites = [os.path.splitext(os.path.basename(filename))[0]] if len(lons)==1 else np.arange(len(lons))+1
        freqs = swanfile.freqs
        dirs = swanfile.dirs

        if ntimes is None:
            spec_list = [s for s in swanfile.readall()]
        else:
            spec_list = [swanfile.read() for itime in range(ntimes)]

        # Read tab files for winds / depth
        if swanfile.is_tab:
            try:
                tab = read_tab(swanfile.tabfile).rename(columns={'dep': attrs.DEPNAME})
                if len(swanfile.times) == tab.index.size:
                    if 'X-wsp' in tab and 'Y-wsp' in tab:
                        tab[attrs.WSPDNAME], tab[attrs.WDIRNAME] = uv_to_spddir(tab['X-wsp'], tab['Y-wsp'], coming_from=True)
                else:
                    warnings.warn(
                        "Times in {} and {} not consistent, not appending winds and depth"
                        .format(swanfile.filename, swanfile.tabfile)
                    )
                    tab = pd.DataFrame()
                tab = tab[list(set(tab.columns).intersection((attrs.DEPNAME, attrs.WSPDNAME, attrs.WDIRNAME)))]
            except Exception as exc:
                warnings.warn(
                    "Cannot parse depth and winds from {}:\n{}".format(swanfile.tabfile, exc)
                )
        else:
            tab = pd.DataFrame()

        # Shrinking times
        if ndays is not None:
            tend = times[0] + datetime.timedelta(days=ndays)
            if tend > times[-1]:
                raise IOError('Times in %s does not extend for %0.2f days' % (filename, ndays))
            iend = times.index(min(times, key=lambda d: abs(d - tend)))
            times = times[0:iend+1]
            spec_list = spec_list[0:iend+1]
            tab = tab.loc[times[0]:tend] if tab is not None else tab

        spec_list = flatten_list(spec_list, [])

        # Interpolate spectra
        if int_freq is not None or int_dir is not None:
            spec_list = [interp_spec(spec, freqs, dirs, int_freq, int_dir) for spec in spec_list]
            freqs = int_freq if int_freq is not None else freqs
            dirs = int_dir if int_dir is not None else dirs

        # Appending
        try:
            arr = np.array(spec_list).reshape(len(times), len(sites), len(freqs), len(dirs))
            cycle = times[0]
            if cycle not in dsets:
                dsets[cycle] = [arr]
                tabs[cycle] = [tab]
                all_sites[cycle] = sites
                all_lons[cycle] = lons
                all_lats[cycle] = lats
                all_times.append(times)
                nsites = 1
            else:
                dsets[cycle].append(arr)
                tabs[cycle].append(tab)
                all_sites[cycle].extend(sites)
                all_lons[cycle].extend(lons)
                all_lats[cycle].extend(lats)
                nsites += 1
        except:
            if len(spec_list) != arr.shape[0]:
                raise IOError('Time length in %s (%i) does not match previous files (%i), cannot concatenate',
                    (filename, len(spec_list), arr.shape[0]))
            else:
                raise
        swanfile.close()

    cycles = dsets.keys()

    # Ensuring sites are consistent across cycles
    sites = all_sites[cycle]
    lons = all_lons[cycle]
    lats = all_lats[cycle]
    for site, lon, lat in zip(all_sites.values(), all_lons.values(), all_lats.values()):
        if (list(site) != list(sites)) or (list(lon) != list(lons)) or (list(lat) != list(lats)):
            raise IOError('Inconsistent sites across cycles in glob pattern provided')

    # Ensuring consistent tabs
    cols = set([frozenset(tabs[cycle][n].columns) for cycle in cycles for n in range(len(tabs[cycle]))])
    if len(cols) > 1:
        raise IOError('Inconsistent tab files, ensure either all or none of the spectra have associated tabfiles and columns are consistent')

    # Concat sites
    for cycle in cycles:
        dsets[cycle] = np.concatenate(dsets[cycle], axis=1)
        deps[cycle] = np.vstack([tab[attrs.DEPNAME].values for tab in tabs[cycle]]).T if attrs.DEPNAME in tabs[cycle][0] else None
        wspds[cycle] = np.vstack([tab[attrs.WSPDNAME].values for tab in tabs[cycle]]).T if attrs.WSPDNAME in tabs[cycle][0] else None
        wdirs[cycle] = np.vstack([tab[attrs.WDIRNAME].values for tab in tabs[cycle]]).T if attrs.WDIRNAME in tabs[cycle][0] else None

    time_sizes = [dsets[cycle].shape[0] for cycle in cycles]

    # Concat cycles
    if len(dsets) > 1:
        dsets = np.concatenate(dsets.values(), axis=0)
        deps = np.concatenate(deps.values(), axis=0) if attrs.DEPNAME in tabs[cycle][0] else None
        wspds = np.concatenate(wspds.values(), axis=0) if attrs.WSPDNAME in tabs[cycle][0] else None
        wdirs = np.concatenate(wdirs.values(), axis=0) if attrs.WDIRNAME in tabs[cycle][0] else None
    else:
        dsets = dsets[cycle]
        deps = deps[cycle] if attrs.DEPNAME in tabs[cycle][0] else None
        wspds = wspds[cycle] if attrs.WSPDNAME in tabs[cycle][0] else None
        wdirs = wdirs[cycle] if attrs.WDIRNAME in tabs[cycle][0] else None

    # Creating dataset
    times = flatten_list(all_times, [])
    dsets = xr.DataArray(
        data=dsets,
        coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites), (attrs.FREQNAME, freqs), (attrs.DIRNAME, dirs))),
        dims=(attrs.TIMENAME, attrs.SITENAME, attrs.FREQNAME, attrs.DIRNAME),
        name=attrs.SPECNAME,
    ).to_dataset()

    dsets[attrs.LATNAME] = xr.DataArray(data=lats, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME])
    dsets[attrs.LONNAME] = xr.DataArray(data=lons, coords={attrs.SITENAME: sites}, dims=[attrs.SITENAME])

    if wspds is not None:
        dsets[attrs.WSPDNAME] = xr.DataArray(data=wspds, dims=[attrs.TIMENAME, attrs.SITENAME],
                                       coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))))
        dsets[attrs.WDIRNAME] = xr.DataArray(data=wdirs, dims=[attrs.TIMENAME, attrs.SITENAME],
                                       coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))))
    if deps is not None:
        dsets[attrs.DEPNAME] = xr.DataArray(data=deps, dims=[attrs.TIMENAME, attrs.SITENAME],
                                      coords=OrderedDict(((attrs.TIMENAME, times), (attrs.SITENAME, sites))))

    # Setting multi-index
    if len(cycles) > 1:
        dsets = dsets.rename({attrs.TIMENAME: 'cycletime'})
        cycletime = zip(
            [item for sublist in [[c]*t for c,t in zip(cycles, time_sizes)] for item in sublist],
            dsets.cycletime.values
        )
        dsets['cycletime'] = pd.MultiIndex.from_tuples(cycletime, names=[attrs.CYCLENAME, attrs.TIMENAME])
        dsets['cycletime'].attrs = attrs.ATTRS[attrs.TIMENAME]

    set_spec_attributes(dsets)
    if 'dir' in dsets and len(dsets.dir)>1:
        dsets[attrs.SPECNAME].attrs.update({'_units': 'm^{2}.s.degree^{-1}', '_variable_name': 'VaDens'})
    else:
        dsets[attrs.SPECNAME].attrs.update({'units': 'm^{2}.s', '_units': 'm^{2}.s', '_variable_name': 'VaDens'})

    return dsets