def stream(self):
     """Generator that yield lines of the file."""
     try:
         fp = open(self.filepath, 'Ur')
     except Exception, exc:
         message = 'Unable to open file {filepath}: {exc}'.format(
             filepath=self.filepath, exc=exc)
         raise OpenFileError(message)
    def __init__(self, filepath):
        BaseHandler.__init__(self)

        try:
            self.fp = h5py.File(filepath, 'r')
        except Exception, exc:
            message = 'Unable to open file %s: %s' % (filepath, exc)
            raise OpenFileError(message)
    def __init__(self, filepath):
        BaseHandler.__init__(self)

        try:
            with open(filepath, 'Ur') as fp:
                reader = csv.reader(fp, quoting=csv.QUOTE_NONNUMERIC)
                vars = reader.next()
        except Exception, exc:
            message = 'Unable to open file {filepath}: {exc}'.format(
                filepath=filepath, exc=exc)
            raise OpenFileError(message)
Exemplo n.º 4
0
    def parse_constraints(self, environ):
        buf_size = int(environ.get('pydap.handlers.netcdf.buf_size', 10000))

        try:
            fp = nc(self.filepath)
        except:
            message = 'Unable to open file %s.' % self.filepath
            raise OpenFileError(message)

        last_modified = formatdate(
            time.mktime(time.localtime(os.stat(self.filepath)[ST_MTIME])))
        environ['pydap.headers'].append(('Last-modified', last_modified))

        dataset = DatasetType(name=os.path.split(self.filepath)[1],
                              attributes={'NC_GLOBAL': var_attrs(fp)})
        for dim in fp.dimensions:
            if fp.dimensions[dim] is None:
                dataset.attributes['DODS_EXTRA'] = {'Unlimited_Dimension': dim}
                break

        fields, queries = environ['pydap.ce']
        fields = fields or [[(quote(name), ())] for name in fp.variables]
        for var in fields:
            target = dataset
            while var:
                name, slice_ = var.pop(0)
                ncname = urllib.unquote(name)
                if (ncname in fp.dimensions
                        or not fp.variables[ncname].dimensions
                        or target is not dataset):
                    target[name] = get_var(ncname, fp, slice_, buf_size)
                elif var:
                    attrs = var_attrs(fp.variables[ncname])
                    target.setdefault(
                        name, StructureType(name=name, attributes=attrs))
                    target = target[name]
                else:  # return grid
                    attrs = var_attrs(fp.variables[ncname])
                    grid = target[name] = GridType(name=name, attributes=attrs)
                    grid[name] = get_var(ncname, fp, slice_, buf_size)
                    slice_ = list(slice_) + [slice(None)] * (
                        len(grid.array.shape) - len(slice_))
                    for dim, dimslice in zip(fp.variables[ncname].dimensions,
                                             slice_):
                        axis = get_var(dim, fp, dimslice, buf_size)
                        grid[axis.name] = axis

        dataset._set_id()
        dataset.close = fp.close
        return dataset
Exemplo n.º 5
0
def get_dataset_info(self):
    try:
        config = yaml.load(file(self.filepath))
    except:
        message = "Unable to open file '%s'." % self.filepath
        raise OpenFileError(message)


#        ds_url = environ["pydap.handlers.test.ds_url"]
    ds_obj = config["dataset"]

    if "name" in ds_obj:
        name = ds_obj["name"]
    else:
        name = os.path.split(self.filepath)[1]

    if "external_dataset_id" in ds_obj:
        ds_id = ds_obj["external_dataset_id"]
    else:
        raise OpenFileError("ExternalDataset ID not specified")

    if "buffer_size" in ds_obj:
        buf = ds_obj["buffer_size"]
    else:
        buf = 10000

    if "url" in ds_obj:
        ds_url = ds_obj["url"]
    else:
        raise OpenFileError("Dataset url not specified")

    if ds_url.startswith("&"):
        full = os.getcwd() + ds_url.replace("&", "")
        ds_url = os.path.abspath(full)

    return name, ds_id, ds_url, buf
Exemplo n.º 6
0
    def __init__(self, filepath):
        BaseHandler.__init__(self)

        if filepath is None:
            self.config = {}
            self.dataset = None
        else:
            try:
                with open(filepath, 'r') as file:
                    config = yaml.load(file, Loader=yaml.FullLoader)
            except Exception as exc:
                raise OpenFileError(
                    'Unable to open file {filepath}: {exc}'.format(
                        filepath=filepath, exc=exc))

            self.config = config
            self.dataset = dataset_model(config)
Exemplo n.º 7
0
    def __init__(self, filepath):
        BaseHandler.__init__(self)

        self.filepath = filepath
        try:
            with netcdf_file(self.filepath, 'r') as source:
                self.additional_headers.append(('Last-modified', (formatdate(
                    time.mktime(time.localtime(
                        os.stat(filepath)[ST_MTIME]))))))

                # shortcuts
                vars = source.variables
                dims = source.dimensions

                # build dataset
                name = os.path.split(filepath)[1]
                self.dataset = DatasetType(
                    name, attributes=dict(NC_GLOBAL=attrs(source)))
                for dim in dims:
                    if dims[dim] is None:
                        self.dataset.attributes['DODS_EXTRA'] = {
                            'Unlimited_Dimension': dim,
                        }
                        break

                # add grids
                grids = [var for var in vars if var not in dims]
                for grid in grids:
                    self.dataset[grid] = GridType(grid, attrs(vars[grid]))
                    # add array
                    self.dataset[grid][grid] = BaseType(
                        grid, LazyVariable(source, grid, grid, self.filepath),
                        vars[grid].dimensions, attrs(vars[grid]))
                    # add maps
                    for dim in vars[grid].dimensions:
                        self.dataset[grid][dim] = BaseType(
                            dim, vars[dim][:], None, attrs(vars[dim]))

                # add dims
                for dim in dims:
                    self.dataset[dim] = BaseType(dim, vars[dim][:], None,
                                                 attrs(vars[dim]))
        except Exception as exc:
            raise
            message = 'Unable to open file %s: %s' % (filepath, exc)
            raise OpenFileError(message)
Exemplo n.º 8
0
def get_file(request, param, var_conf, time, lat, lon, verbose=False):

    ntime = len(time)
    ncoord = len(lat) * len(lon)

    var_list = [
        ((FORMAT_STR if vartype == "surface" else FORMAT_STR_PL).format(
            var=var, **param)) for var, vartype in var_conf.items()
    ]

    if verbose:
        print(request + ",".join(var_list))

    try:
        dataset = open_dods(request + ",".join(var_list))
    except:
        raise OpenFileError("file '{}' not available".format(request[:-1]))

    var_data = [
        var.data.reshape((ntime, -1, ncoord)) for var in dataset.values()
    ]
    var_names = [
        "{}{}".format(var, n) for idx, var in enumerate(dataset)
        for n in range(var_data[idx].shape[1])
    ]

    index = pd.MultiIndex.from_product((lat, lon), names=["lat", "lon"])
    columns = pd.MultiIndex.from_product((time, var_names),
                                         names=["time", "var"])

    return pd.DataFrame(
        (np.concatenate(var_data, axis=1).transpose(2, 0, 1).reshape(
            ncoord, -1)),
        index=index,
        columns=columns,
    )
Exemplo n.º 9
0
def save_dataset(
    fname,
    date,
    hour,
    var_conf,
    res,
    step,
    time_tuple,
    lev_idx,
    lat_tuple,
    lon_tuple,
    verbose=False,
):

    request = URL.format(
        date=date,
        hour=hour,
        res="{0:.2f}".format(res).replace(".", "p"),
        step="" if step == 3 else "_{:1d}hr".format(step),
    )

    if verbose:
        print(request + "lat,lon")

    try:
        coord = open_dods(request + "lat,lon")
    except:
        raise OpenFileError("file '{}' not available".format(request[:-1]))

    # We don't get the time array from the server since it is in seconds from a
    # date. Instead we compute the times in hours manually.
    time = range1(*time_tuple, step=step)
    # TODO: there is a possible problem here if the division is not exact
    time_idx = (int(time_tuple[0] / step), int(time_tuple[1] / step))

    # Slicing [:] downloads the data from the server
    lat, lon = coord["lat"][:].data, coord["lon"][:].data

    # Transform longitudes from range 0..360 to -180..180
    lon = np.where(lon > 180, lon - 360, lon)

    # Transform into python lists to use the index() method
    # TODO: change to find the closest lat/lon with argmin and not an exact match
    lat_list, lon_list = lat.tolist(), lon.tolist()

    try:
        lat_idx = (lat_list.index(lat_tuple[0]), lat_list.index(lat_tuple[1]))
    except:
        raise ValueError("Latitude not in the grid", lat_tuple)

    lat = lat[range1(*lat_idx)].tolist()

    if lon_tuple[0] < 0 and lon_tuple[1] > 0:
        try:
            lon_idx_w = (lon_list.index(lon_tuple[0]), len(lon_list) - 1)
            lon_idx_e = (0, lon_list.index(lon_tuple[1]))
        except:
            raise ValueError("Longitude not in the grid", lon_tuple)

        lon_w = lon[range1(*lon_idx_w)].tolist()
        lon_e = lon[range1(*lon_idx_e)].tolist()

        param_w = {
            "lat": lat_idx,
            "lon": lon_idx_w,
            "time": time_idx,
            "lev": lev_idx
        }
        param_e = {
            "lat": lat_idx,
            "lon": lon_idx_e,
            "time": time_idx,
            "lev": lev_idx
        }
        try:
            data_w = get_file(request,
                              param_w,
                              var_conf,
                              time,
                              lat,
                              lon_w,
                              verbose=verbose)
            data_e = get_file(request,
                              param_e,
                              var_conf,
                              time,
                              lat,
                              lon_e,
                              verbose=verbose)
        except:
            raise
        data = pd.concat((data_w, data_e), axis=0)

    else:
        try:
            lon_idx = (lon_list.index(lon_tuple[0]),
                       lon_list.index(lon_tuple[1]))
        except:
            raise ValueError("Longitude not in the grid", lon_tuple)

        lon = lon[range1(*lon_idx)].tolist()

        param = {
            "lat": lat_idx,
            "lon": lon_idx,
            "time": time_idx,
            "lev": lev_idx
        }
        try:
            data = get_file(request,
                            param,
                            var_conf,
                            time,
                            lat,
                            lon,
                            verbose=verbose)
        except:
            raise

    data.to_csv(fname, sep=" ", float_format="%.3f")