Пример #1
0
def main():
    # get precomputed file / compute preprocessing & feature
    data = [
        open_dataset("dev", 1),
        open_dataset("train", 1),
        open_dataset("test", 1)
    ]
    # log.write("Preprocessing dataset...")
    # pre_processed_data = preprocessing_data(flatten(data))
    log.write("Feature extraction...")
    feature_data = feature_extraction(flatten(data))
    for fold in range(1, 6):
        log.write("Get fold {} of IndoSum dataset".format(fold))
        train_data, val_data, test_data = get_data(fold, feature_data)
        for method in methods_ready:
            # log.write("Evaluating {}".format(method))
            log.write("==================================")
            log.write("Prediction using {}".format(method))
            log.write("==================================")
            predicted_labels = run_experiment(train_data, val_data, test_data,
                                              method)
            log.write("Acc/P/R evaluation")
            label_evaluation(test_data, predicted_labels)
            log.write("ROUGE evaluation")
            rouge_evaluation(test_data, predicted_labels)
        del predicted_labels
        del train_data
        del val_data
        del test_data
        gc.collect()
Пример #2
0
def get_scale(dataset, variable, depth, time, projection, extent):
    x = np.linspace(extent[0], extent[2], 50)
    y = np.linspace(extent[1], extent[3], 50)
    xx, yy = np.meshgrid(x, y)
    dest = Proj(init=projection)
    lon, lat = dest(xx, yy, inverse=True)

    variables_anom = variable.split(",")
    variables = [re.sub('_anom$', '', v) for v in variables_anom]

    with open_dataset(get_dataset_url(dataset)) as ds:
        timestamp = ds.timestamps[time]

        d = ds.get_area(
            np.array([lat, lon]),
            depth,
            time,
            variables[0]
        )
        if len(variables) > 1:
            d0 = d
            d1 = ds.get_area(
                np.array([lat, lon]),
                depth,
                time,
                variables[1]
            )
            d = np.sqrt(d0 ** 2 + d1 ** 2)

        variable_unit = get_variable_unit(dataset,
                                          ds.variables[variables[0]])
        if variable_unit.startswith("Kelvin"):
            variable_unit = "Celsius"
            d = np.add(d, -273.15)

    if variables != variables_anom:
        with open_dataset(get_dataset_climatology(dataset), 'r') as ds:
            c = ds.get_area(
                np.array([lat, lon]),
                depth,
                timestamp.month - 1,
                variables[0]
            )

            if len(variables) > 1:
                c0 = c
                c1 = ds.get_area(
                    np.array([lat, lon]),
                    depth,
                    timestamp.month - 1,
                    variables[1]
                )
                c = np.sqrt(c0 ** 2 + c1 ** 2)

            d = d - c

            m = max(abs(d.min()), abs(d.max()))
            return -m, m

    return d.min(), d.max()
def demo():
    flatten = lambda l: [item for sublist in l for item in sublist]
    data = [
        open_dataset("dev", 1),
        open_dataset("train", 1),
        open_dataset("test", 1)
    ]
    data = flatten(data)
    save_preprocessed_data(data)
Пример #4
0
    def test_open_dataset_with_null_url_raises(self, patch_get_dataset_config):
        patch_get_dataset_config.return_value = {
            "giops": {
                "url": None,
                "variables": {}
            }
        }
        config = DatasetConfig("giops")

        with self.assertRaises(ValueError):
            open_dataset(config)
Пример #5
0
def get_data(fold, feature_data=None, preprocessed_data=None):
    train_data = open_dataset("train", fold)
    val_data = open_dataset("dev", fold)
    test_data = open_dataset("test", fold)
    for data_split in [train_data, val_data, test_data]:
        for doc in data_split:
            for attr in feature_attr_name:
                doc[attr] = feature_data[doc["id"]][attr]
            if preprocessed_data:
                for attr in preprocessing_attr:
                    doc[attr] = preprocessed_data[doc["id"]][attr]
    return train_data, val_data, test_data
Пример #6
0
    def test_open_dataset_no_model_class_raises(self,
                                                patch_get_dataset_config):
        patch_get_dataset_config.return_value = {
            "giops": {
                "url": "tests/testdata/mercator_test.nc",
                "variables": {}
            }
        }
        config = DatasetConfig("giops")

        with self.assertRaises(ValueError):
            open_dataset(config)
Пример #7
0
def timestamp_for_date(old_dataset, date, new_dataset):
    with open_dataset(get_dataset_url(old_dataset)) as ds:
        timestamp = ds.timestamps[date]

    with open_dataset(get_dataset_url(new_dataset)) as ds:
        timestamps = ds.timestamps

    diffs = np.vectorize(lambda x: x.total_seconds())(timestamps - timestamp)
    idx = np.where(diffs <= 0)[0]
    res = 0
    if len(idx) > 0:
        res = idx.max()

    return Response(json.dumps(res), status=200, mimetype='application/json')
Пример #8
0
def get_point_data(dataset, variable, time, depth, location):
    variables_anom = variable.split(",")
    variables = [re.sub('_anom$', '', v) for v in variables_anom]

    data = []
    names = []
    units = []
    with open_dataset(get_dataset_url(dataset)) as ds:
        timestamp = ds.timestamps[time]

        for v in variables:
            d = ds.get_point(
                location[0],
                location[1],
                depth,
                time,
                v
            )
            variable_name = get_variable_name(dataset, ds.variables[v])
            variable_unit = get_variable_unit(dataset, ds.variables[v])

            if variable_unit.startswith("Kelvin"):
                variable_unit = "Celsius"
                d = np.add(d, -273.15)

            data.append(d)
            names.append(variable_name)
            units.append(variable_unit)

    if variables != variables_anom:
        with open_dataset(get_dataset_climatology(dataset)) as ds:
            for idx, v in enumerate(variables):
                d = ds.get_point(
                    location[0],
                    location[1],
                    depth,
                    timestamp.month,
                    v
                )

                data[idx] = data[idx] - d
                names[idx] = names[idx] + " Anomaly"

    result = {
        'value': map(lambda f: '%s' % float('%.4g' % f), data),
        'location': map(lambda f: round(f, 4), location),
        'name': names,
        'units': units,
    }
    return result
Пример #9
0
def get_scale(dataset, variable, depth, time, projection, extent, interp,
              radius, neighbours):
    x = np.linspace(extent[0], extent[2], 50)
    y = np.linspace(extent[1], extent[3], 50)
    xx, yy = np.meshgrid(x, y)
    dest = Proj(init=projection)
    lon, lat = dest(xx, yy, inverse=True)

    variables_anom = variable.split(",")
    variables = [re.sub('_anom$', '', v) for v in variables_anom]

    with open_dataset(get_dataset_url(dataset)) as ds:
        timestamp = ds.timestamps[time]

        d = ds.get_area(np.array([lat, lon]), depth, time, variables[0],
                        interp, radius, neighbours)

        if len(variables) > 1:
            d0 = d
            d1 = ds.get_area(np.array([lat, lon]), depth, time, variables[1],
                             interp, radius, neighbours)
            d = np.sqrt(d0**2 + d1**2)

        variable_unit = get_variable_unit(dataset, ds.variables[variables[0]])
        if variable_unit.startswith("Kelvin"):
            variable_unit = "Celsius"
            d = np.add(d, -273.15)

    if variables != variables_anom:
        with open_dataset(get_dataset_climatology(dataset), 'r') as ds:
            c = ds.get_area(np.array([lat, lon]), depth, timestamp.month - 1,
                            variables[0], interp, radius, neighbours)

            if len(variables) > 1:
                c0 = c
                c1 = ds.get_area(np.array([lat,
                                           lon]), depth, timestamp.month - 1,
                                 variables[1], interp, radius, neighbours)
                c = np.sqrt(c0**2 + c1**2)

            d = d - c

            m = max(abs(d.nanmin()), abs(d.nanmax()))
            return -m, m

    # Return min and max values of selected variable, while ignoring
    # nan values
    return np.nanmin(d), np.nanmax(d)
def get_data_v1_0():
    """
    Returns a geojson representation of requested model data.

    API Format: GET /api/v1.0/data?...

    Required params:
    * dataset: dataset key (e.g. giops_day)
    * variable: variable key (e.g. votemper)
    * time: time index (e.g. 0)
    * depth: depth index (e.g. 49)
    * geometry_type: the "shape" of the data being requested
    """

    try:
        result = GetDataSchema().load(request.args)
    except ValidationError as e:
        abort(400, str(e))

    config = DatasetConfig(result['dataset'])

    with open_dataset(config,
                      variable=result['variable'],
                      timestamp=result['time']) as ds:
        return jsonify(
            geojson.dumps(data_array_to_geojson(
                ds.nc_data.get_dataset_variable(
                    result['variable'])[result['time'], result['depth'], :, :],
                config.lat_var_key, config.lon_var_key),
                          allow_nan=True))
Пример #11
0
def timestamp_outOfBounds(dataset: str, time: int):
    config = DatasetConfig(dataset)
    length = 0
    with open_dataset(config) as ds:
        length = len(ds.timestamps)
        
    return not (0 <= time < length)
Пример #12
0
def get_point_data(dataset, variable, time, depth, location):
    variables = variable.split(",")

    data = []
    names = []
    units = []
    dsc = DatasetConfig(dataset)
    with open_dataset(dsc) as ds:
        for v in variables:
            d = ds.get_point(
                location[0],
                location[1],
                depth,
                v,
                time
            )
            variable_name = dsc.variable[ds.variables[v]].name
            variable_unit = dsc.variable[ds.variables[v]].unit

            data.append(d)
            names.append(variable_name)
            units.append(variable_unit)

    result = {
        'value': [f'{float(f):.4g}' for f in data],
        'location': [round(f, 4) for f in location],
        'name': names,
        'units': units,
    }
    return result
Пример #13
0
def time_query():
    data = []
    if 'dataset' in request.args:
        dataset = request.args['dataset']
        quantum = request.args.get('quantum')
        with open_dataset(get_dataset_url(dataset)) as ds:
            for idx, date in enumerate(ds.timestamps):
                if quantum == 'month':
                    date = datetime.datetime(date.year, date.month, 15)
                data.append({
                    'id': idx,
                    'value': date.replace(tzinfo=pytz.UTC)
                })

    data = sorted(data, key=lambda k: k['id'])

    class DateTimeEncoder(json.JSONEncoder):
        def default(self, o):
            if isinstance(o, datetime.datetime):
                return o.isoformat()

            return json.JSONEncoder.default(self, o)

    js = json.dumps(data, cls=DateTimeEncoder)
    resp = Response(js, status=200, mimetype='application/json')
    return resp
Пример #14
0
    def load_data(self):
        with open_dataset(get_dataset_url(self.dataset_name)) as d:
            if self.time < 0:
                self.time += len(d.timestamps)
            time = np.clip(self.time, 0, len(d.timestamps) - 1)
            timestamp = d.timestamps[time]

            try:
                self.load_misc(d, self.variables)
            except IndexError as e:
                raise ClientError(
                    gettext(
                        "The selected variable(s) were not found in the dataset. \
                Most likely, this variable is a derived product from existing dataset variables. \
                Please select another variable. ") + str(e))

            point_data, point_depths = self.get_data(d, self.variables, time)
            point_data = self.apply_scale_factors(point_data)

            self.variable_units, point_data = self.kelvin_to_celsius(
                self.variable_units, point_data)

        self.data = self.subtract_other(point_data)
        self.depths = point_depths
        self.timestamp = timestamp
Пример #15
0
    def load_data(self):
        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            latvar, lonvar = utils.get_latlon_vars(dataset)

            if self.depth:
                if self.depth == 'bottom':
                    self.depth_value = 'Bottom'
                    self.depth_unit = ''
                else:
                    self.depth = np.clip(int(self.depth), 0,
                                         len(dataset.depths) - 1)
                    self.depth_value = np.round(dataset.depths[self.depth])
                    self.depth_unit = "m"
            else:
                self.depth_value = 0
                self.depth_unit = "m"

            self.fix_startend_times(dataset)

            time = range(self.starttime, self.endtime + 1)
            if len(self.variables) > 1:
                v = []
                for name in self.variables:
                    pts, distance, t, value = dataset.get_path(
                        self.points,
                        self.depth,
                        time,
                        name
                    )
                    v.append(value ** 2)

                value = np.sqrt(np.ma.sum(v, axis=0))
            else:
                pts, distance, t, value = dataset.get_path(
                    self.points,
                    self.depth,
                    time,
                    self.variables[0]
                )

            self.path_points = pts
            self.distance = distance

            variable_names = self.get_variable_names(dataset, self.variables)
            variable_units = self.get_variable_units(dataset, self.variables)
            scale_factors = self.get_variable_scale_factors(dataset,
                                                            self.variables)

            self.variable_unit, self.data = self.kelvin_to_celsius(
                variable_units[0],
                value
            )
            self.data = np.multiply(self.data, scale_factors[0])
            self.variable_name = variable_names[0]
            self.data = self.data.transpose()

            if self.cmap is None:
                self.cmap = colormap.find_colormap(self.variable_name)

            self.times = dataset.timestamps[self.starttime:self.endtime + 1]
Пример #16
0
def subset_query_v1_0():

    args = None
    if request.method == "GET":
        args = request.args
    else:
        args = request.form

    working_dir = None
    subset_filename = None

    if "area" in args.keys():
        # Predefined area selected
        area = args.get("area")
        sp = area.split("/", 1)

        data = utils.misc.list_areas(sp[0], simplify=False)

        b = [x for x in data if x.get("key") == area]
        args = args.to_dict()
        args["polygons"] = b[0]["polygons"]

    config = DatasetConfig(args.get("dataset_name"))
    time_range = args["time"].split(",")
    variables = args["variables"].split(",")
    with open_dataset(
        config,
        variable=variables,
        timestamp=int(time_range[0]),
        endtime=int(time_range[1]),
    ) as dataset:
        working_dir, subset_filename = dataset.nc_data.subset(args)

    return send_from_directory(working_dir, subset_filename, as_attachment=True)
Пример #17
0
def subset_query_v1_0():

    args = None
    if request.method == 'GET':
        args = request.args
    else:
        args = request.form

    working_dir = None
    subset_filename = None

    if 'area' in args.keys():
        # Predefined area selected
        area = args.get('area')
        sp = area.split('/', 1)

        data = utils.misc.list_areas(sp[0], simplify=False)

        b = [x for x in data if x.get('key') == area]
        args = args.to_dict()
        args['polygons'] = b[0]['polygons']

    config = DatasetConfig(args.get('dataset_name'))
    time_range = args['time'].split(',')
    variables = args['variables'].split(',')
    with open_dataset(config,
                      variable=variables,
                      timestamp=int(time_range[0]),
                      endtime=int(time_range[1])) as dataset:
        working_dir, subset_filename = dataset.nc_data.subset(args)

    return send_from_directory(working_dir,
                               subset_filename,
                               as_attachment=True)
Пример #18
0
def plot_v1_0():

    if request.method == 'GET':
        args = request.args
    else:
        args = request.form
    query = json.loads(args.get('query'))

    with open_dataset(get_dataset_url(query.get('dataset'))) as dataset:
        if 'time' in query:
            query['time'] = dataset.convert_to_timestamp(query.get('time'))
        else:
            query['starttime'] = dataset.convert_to_timestamp(
                query.get('starttime'))
            query['endtime'] = dataset.convert_to_timestamp(
                query.get('endtime'))

        resp = routes.routes_impl.plot_impl(args, query)

        m = hashlib.md5()
        m.update(str(resp).encode())
        if 'data' in request.args:
            plotData = {
                'data': str(resp),
                'shape': resp.shape,
                'mask': str(resp.mask)
            }
            plotData = json.dumps(plotData)
            return Response(plotData, status=200, mimetype='application/json')
        return resp
Пример #19
0
def range_query_v1_0(dataset, variable, interp, radius, neighbours, projection,
                     extent, depth, time):
    with open_dataset(get_dataset_url(dataset)) as ds:
        date = ds.convert_to_timestamp(time)
        return routes.routes_impl.range_query_impl(interp, radius, neighbours,
                                                   dataset, projection, extent,
                                                   variable, depth, date)
Пример #20
0
def tile_v1_0(projection, interp, radius, neighbours, dataset, variable, time,
              depth, scale, zoom, x, y):
    with open_dataset(get_dataset_url(dataset)) as ds:
        date = ds.convert_to_timestamp(time)
        return routes.routes_impl.tile_impl(projection, interp, radius,
                                            neighbours, dataset, variable,
                                            date, depth, scale, zoom, x, y)
Пример #21
0
def get_scale(dataset, variable, depth, timestamp, projection, extent, interp,
              radius, neighbours):
    """
    Calculates and returns the range (min, max values) of a selected variable,
    given the current map extents.
    """
    x = np.linspace(extent[0], extent[2], 50)
    y = np.linspace(extent[1], extent[3], 50)
    xx, yy = np.meshgrid(x, y)
    dest = Proj(init=projection)
    lon, lat = dest(xx, yy, inverse=True)

    variables = variable.split(",")
    config = DatasetConfig(dataset)

    with open_dataset(config, variable=variables, timestamp=timestamp) as ds:

        d = ds.get_area(np.array([lat, lon]), depth, timestamp, variables[0],
                        interp, radius, neighbours)

        if len(variables) > 1:
            d0 = d
            d1 = ds.get_area(np.array([lat, lon]), depth, timestamp,
                             variables[1], interp, radius, neighbours)
            d = __magnitude(d0,
                            d1)  # Use your dot-product instead of exponents

        return normalize_scale(d, config.variable[",".join(variables)])
Пример #22
0
def timestamps():
    """
    Returns all timestamps available for a given variable in a dataset. This is variable-dependent
    because datasets can have multiple "quantums", as in surface 2D variables may be hourly, while
    3D variables may be daily.

    API Format: /api/v1.0/timestamps/?dataset=''&variable=''

    Required Arguments:
    * dataset : Dataset key - Can be found using /api/v1.0/datasets
    * variable : Variable key - Can be found using /api/v1.0/variables/?dataset='...'...

    Raises:
        APIError: if dataset or variable is not specified in the request

    Returns:
        Response object containing all timestamp pairs (e.g. [raw_timestamp_integer, iso_8601_date_string]) for the given
        dataset and variable.
    """

    args = request.args
    if "dataset" not in args:
        raise APIError("Please specify a dataset via ?dataset=dataset_name")

    dataset = args.get("dataset")
    config = DatasetConfig(dataset)

    if "variable" not in args:
        raise APIError("Please specify a variable via ?variable=variable_name")
    variable = args.get("variable")

    # Handle possible list of URLs for staggered grid velocity field datasets
    url = config.url if not isinstance(config.url, list) else config.url[0]
    if url.endswith(".sqlite3"):
        with SQLiteDatabase(url) as db:
            if variable in config.calculated_variables:
                data_vars = get_data_vars_from_equation(
                    config.calculated_variables[variable]['equation'],
                    [v.key for v in db.get_data_variables()])
                vals = db.get_timestamps(data_vars[0])
            else:
                vals = db.get_timestamps(variable)
    else:
        with open_dataset(config, variable=variable) as ds:
            vals = list(map(int, ds.nc_data.time_variable.values))
    converted_vals = time_index_to_datetime(vals, config.time_dim_units)

    result = []
    for idx, date in enumerate(converted_vals):
        if config.quantum == 'month' or config.variable[
                variable].quantum == 'month':
            date = datetime.datetime(date.year, date.month, 15)
        result.append({'id': vals[idx], 'value': date})
    result = sorted(result, key=lambda k: k['id'])

    js = json.dumps(result, cls=DateTimeEncoder)

    resp = Response(js, status=200, mimetype='application/json')
    return resp
Пример #23
0
def get_data_v1_0(dataset: str, variable: str, time: str, depth: str,
                  location: str):
    config = DatasetConfig(dataset)
    with open_dataset(config) as ds:
        date = ds.convert_to_timestamp(time)
        #print(date)
        return routes.routes_impl.get_data_impl(dataset, variable, date, depth,
                                                location)
Пример #24
0
    def load_data(self):
        if not isinstance(self.depth, list):
            self.depth = [self.depth]

        self.depth = sorted(self.depth)

        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            if self.starttime < 0:
                self.starttime += len(dataset.timestamps)
            if self.endtime < 0:
                self.endtime += len(dataset.timestamps)
            start = np.clip(self.starttime, 0, len(dataset.timestamps) - 1)
            end = np.clip(self.endtime, 0, len(dataset.timestamps) - 1)

            timestamp = dataset.timestamps[start:end + 1]

            self.load_misc(dataset, self.variables)

            point_data = []
            point_depth = []
            for p in self.points:
                data = []
                depth = []
                for v in self.variables:
                    dd = []
                    jj = []
                    for d in self.depth:
                        da, dp = dataset.get_timeseries_point(
                            float(p[0]),
                            float(p[1]),
                            d,
                            start,
                            end,
                            v,
                            return_depth=True
                        )
                        dd.append(da)
                        jj.append(dp)
                    data.append(np.ma.array(dd))
                    depth.append(np.ma.array(jj))
                point_data.append(np.ma.array(data))
                point_depth.append(np.ma.array(depth))

            point_data = np.ma.array(point_data)
            point_depth = np.ma.array(point_depth)

            for idx, factor in enumerate(self.scale_factors):
                if factor != 1.0:
                    point_data[idx] = np.multiply(point_data[idx], factor)

            self.variable_units, point_data = self.kelvin_to_celsius(
                self.variable_units,
                point_data
            )

        self.data = self.subtract_climatology(point_data, timestamp)
        self.data_depth = point_depth
        self.timestamp = timestamp
Пример #25
0
    def load_data(self):
        with open_dataset(self.dataset_config) as dataset:
            if self.time < 0:
                self.time += len(dataset.timestamps)
            time = np.clip(self.time, 0, len(dataset.timestamps) - 1)

            self.timestamp = dataset.timestamps[time]

            self.load_temp_sal(dataset, time)
Пример #26
0
def range_query_v1_0(dataset: str, variable: str, interp: str, radius: int,
                     neighbours: int, projection: str, extent: str, depth: str,
                     time: str):
    config = DatasetConfig(dataset)
    with open_dataset(config) as ds:
        date = ds.convert_to_timestamp(time)
        return routes.routes_impl.range_query_impl(interp, radius, neighbours,
                                                   dataset, projection, extent,
                                                   variable, depth, date)
Пример #27
0
def scale(args):
    dataset_name = args.get('dataset')
    scale = args.get('scale')
    scale = [float(component) for component in scale.split(',')]

    variable = args.get('variable')
    if variable.endswith('_anom'):
        variable = variable[0:-5]
        anom = True
    else:
        anom = False

    variable = variable.split(',')

    with open_dataset(get_dataset_url(dataset_name)) as dataset:
        variable_unit = get_variable_unit(dataset_name,
                                          dataset.variables[variable[0]])
        variable_name = get_variable_name(dataset_name,
                                          dataset.variables[variable[0]])

    if variable_unit.startswith("Kelvin"):
        variable_unit = "Celsius"

    if anom:
        cmap = colormap.colormaps['anomaly']
        variable_name = gettext("%s Anomaly") % variable_name
    else:
        cmap = colormap.find_colormap(variable_name)

    if len(variable) == 2:
        if not anom:
            cmap = colormap.colormaps.get('speed')

        variable_name = re.sub(
            r"(?i)( x | y |zonal |meridional |northward |eastward )", " ",
            variable_name)
        variable_name = re.sub(r" +", " ", variable_name)

    fig = plt.figure(figsize=(2, 5), dpi=75)
    ax = fig.add_axes([0.05, 0.05, 0.25, 0.9])
    norm = matplotlib.colors.Normalize(vmin=scale[0], vmax=scale[1])

    formatter = ScalarFormatter()
    formatter.set_powerlimits((-3, 4))
    bar = ColorbarBase(ax, cmap=cmap, norm=norm, orientation='vertical',
                       format=formatter)
    bar.set_label("%s (%s)" % (variable_name.title(),
                               utils.mathtext(variable_unit)))

    buf = StringIO()
    try:
        plt.savefig(buf, format='png', dpi='figure', transparent=False,
                    bbox_inches='tight', pad_inches=0.05)
        plt.close(fig)
        return buf.getvalue()
    finally:
        buf.close()
Пример #28
0
def scale(args):
    dataset_name = args.get('dataset')
    scale = args.get('scale')
    scale = [float(component) for component in scale.split(',')]

    variable = args.get('variable')
    if variable.endswith('_anom'):
        variable = variable[0:-5]
        anom = True
    else:
        anom = False

    variable = variable.split(',')

    with open_dataset(get_dataset_url(dataset_name)) as dataset:
        variable_unit = get_variable_unit(dataset_name,
                                          dataset.variables[variable[0]])
        variable_name = get_variable_name(dataset_name,
                                          dataset.variables[variable[0]])

    if variable_unit.startswith("Kelvin"):
        variable_unit = "Celsius"

    if anom:
        cmap = colormap.colormaps['anomaly']
        variable_name = gettext("%s Anomaly") % variable_name
    else:
        cmap = colormap.find_colormap(variable_name)

    if len(variable) == 2:
        if not anom:
            cmap = colormap.colormaps.get('speed')

        variable_name = re.sub(
            r"(?i)( x | y |zonal |meridional |northward |eastward )", " ",
            variable_name)
        variable_name = re.sub(r" +", " ", variable_name)

    fig = plt.figure(figsize=(2, 5), dpi=75)
    ax = fig.add_axes([0.05, 0.05, 0.25, 0.9])
    norm = matplotlib.colors.Normalize(vmin=scale[0], vmax=scale[1])

    formatter = ScalarFormatter()
    formatter.set_powerlimits((-3, 4))
    bar = ColorbarBase(ax, cmap=cmap, norm=norm, orientation='vertical',
                       format=formatter)
    bar.set_label("%s (%s)" % (variable_name.title(),
                               utils.mathtext(variable_unit)))

    buf = StringIO()
    try:
        plt.savefig(buf, format='png', dpi='figure', transparent=False,
                    bbox_inches='tight', pad_inches=0.05)
        plt.close(fig)
        return buf.getvalue()
    finally:
        buf.close()
Пример #29
0
def tile_v1_0(projection: str, interp: str, radius: int, neighbours: int,
              dataset: str, variable: str, time: str, depth: str, scale: str,
              zoom: int, x: int, y: int):

    config = DatasetConfig(dataset)
    with open_dataset(config) as ds:
        date = ds.convert_to_timestamp(time)
        return routes.routes_impl.tile_impl(projection, interp, radius,
                                            neighbours, dataset, variable,
                                            date, depth, scale, zoom, x, y)
Пример #30
0
    def load_data(self):
        variables = self.dataset_config.variables
        temp_var_key = self.__find_var_key(variables, r'^(.*temp.*|thetao.*)$')
        sal_var_key = self.__find_var_key(variables, r'^(.*sal.*|so)$')

        with open_dataset(self.dataset_config, timestamp=self.time, variable=[temp_var_key, sal_var_key]) as ds:

            self.iso_timestamp = ds.nc_data.timestamp_to_iso_8601(self.time)

            self.__load_temp_sal(ds, self.time, temp_var_key, sal_var_key)
Пример #31
0
    def load_data(self):
        if not isinstance(self.depth, list):
            self.depth = [self.depth]

        self.depth = sorted(self.depth)

        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            if self.starttime < 0:
                self.starttime += len(dataset.timestamps)
            if self.endtime < 0:
                self.endtime += len(dataset.timestamps)
            start = np.clip(self.starttime, 0, len(dataset.timestamps) - 1)
            end = np.clip(self.endtime, 0, len(dataset.timestamps) - 1)

            timestamp = dataset.timestamps[start:end + 1]

            self.load_misc(dataset, self.variables)

            point_data = []
            point_depth = []
            for p in self.points:
                data = []
                depth = []
                for v in self.variables:
                    dd = []
                    jj = []
                    for d in self.depth:
                        da, dp = dataset.get_timeseries_point(
                            float(p[0]),
                            float(p[1]),
                            d,
                            start,
                            end,
                            v,
                            return_depth=True)
                        dd.append(da)
                        jj.append(dp)
                    data.append(np.ma.array(dd))
                    depth.append(np.ma.array(jj))
                point_data.append(np.ma.array(data))
                point_depth.append(np.ma.array(depth))

            point_data = np.ma.array(point_data)
            point_depth = np.ma.array(point_depth)

            for idx, factor in enumerate(self.scale_factors):
                if factor != 1.0:
                    point_data[idx] = np.multiply(point_data[idx], factor)

            self.variable_units, point_data = self.kelvin_to_celsius(
                self.variable_units, point_data)

        self.data = self.subtract_other(point_data)
        self.data_depth = point_depth
        self.timestamp = timestamp
Пример #32
0
    def test_open_dataset_returns_nemo_object(self, patch_calculated_data,
                                              patch_get_dataset_config):
        patch_get_dataset_config.return_value = {
            "giops": {
                "url": "tests/testdata/nemo_test.nc",
                "variables": {}
            }
        }
        config = DatasetConfig("giops")

        with open_dataset(config) as ds:
            self.assertTrue(isinstance(ds, Nemo))
Пример #33
0
    def test_open_dataset_meta_only_returns_fvcom_object(
            self, patch_calculated_data, patch_get_dataset_config):
        patch_get_dataset_config.return_value = {
            "giops": {
                "url": "tests/testdata/fvcom_test.nc",
                "variables": {}
            }
        }
        config = DatasetConfig("giops")

        with open_dataset(config, meta_only=True) as ds:
            self.assertTrue(isinstance(ds, Fvcom))
Пример #34
0
    def load_data(self):
        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            if self.time < 0:
                self.time += len(dataset.timestamps)
            time = np.clip(self.time, 0, len(dataset.timestamps) - 1)

            self.timestamp = dataset.timestamps[time]

            self.load_temp_sal(dataset, time)

            self.variable_units[0], self.temperature = \
                super(point.PointPlotter, self).kelvin_to_celsius(
                    self.variable_units[0], self.temperature
            )
Пример #35
0
    def load_data(self):
        with open_dataset(get_dataset_url(self.dataset_name)) as d:
            if self.time < 0:
                self.time += len(d.timestamps)
            time = np.clip(self.time, 0, len(d.timestamps) - 1)
            timestamp = d.timestamps[time]

            self.load_misc(d, self.variables)
            point_data, point_depths = self.get_data(d, self.variables, time)
            point_data = self.apply_scale_factors(point_data)

            self.variable_units, point_data = self.kelvin_to_celsius(
                self.variable_units,
                point_data
            )

        self.data = self.subtract_climatology(point_data, timestamp)
        self.depths = point_depths
        self.timestamp = timestamp
Пример #36
0
    def load_data(self):
        if self.projection == 'EPSG:32661':
            blat = min(self.bounds[0], self.bounds[2])
            blat = 5 * np.floor(blat / 5)
            self.basemap = basemap.load_map('npstere', (blat, 0), None, None)
        elif self.projection == 'EPSG:3031':
            blat = max(self.bounds[0], self.bounds[2])
            blat = 5 * np.ceil(blat / 5)
            self.basemap = basemap.load_map('spstere', (blat, 180), None, None)
        else:
            distance = VincentyDistance()
            height = distance.measure(
                (self.bounds[0], self.centroid[1]),
                (self.bounds[2], self.centroid[1])
            ) * 1000 * 1.25
            width = distance.measure(
                (self.centroid[0], self.bounds[1]),
                (self.centroid[0], self.bounds[3])
            ) * 1000 * 1.25
            self.basemap = basemap.load_map(
                'lcc', self.centroid, height, width
            )

        if self.basemap.aspect < 1:
            gridx = 500
            gridy = int(500 * self.basemap.aspect)
        else:
            gridy = 500
            gridx = int(500 / self.basemap.aspect)

        self.longitude, self.latitude = self.basemap.makegrid(gridx, gridy)

        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            if self.time < 0:
                self.time += len(dataset.timestamps)
            self.time = np.clip(self.time, 0, len(dataset.timestamps) - 1)

            self.variable_unit = self.get_variable_units(
                dataset, self.variables
            )[0]
            self.variable_name = self.get_variable_names(
                dataset,
                self.variables
            )[0]
            scale_factor = self.get_variable_scale_factors(
                dataset, self.variables
            )[0]

            if self.cmap is None:
                if len(self.variables) == 1:
                    self.cmap = colormap.find_colormap(self.variable_name)
                else:
                    self.cmap = colormap.colormaps.get('speed')

            if len(self.variables) == 2:
                self.variable_name = self.vector_name(self.variable_name)

            if self.depth == 'bottom':
                depth_value = 'Bottom'
            else:
                self.depth = np.clip(
                    int(self.depth), 0, len(dataset.depths) - 1)
                depth_value = dataset.depths[self.depth]

            data = []
            allvars = []
            for v in self.variables:
                var = dataset.variables[v]
                allvars.append(v)
                if self.filetype in ['csv', 'odv', 'txt']:
                    d, depth_value = dataset.get_area(
                        np.array([self.latitude, self.longitude]),
                        self.depth,
                        self.time,
                        v,
                        return_depth=True
                    )
                else:
                    d = dataset.get_area(
                        np.array([self.latitude, self.longitude]),
                        self.depth,
                        self.time,
                        v
                    )

                d = np.multiply(d, scale_factor)
                self.variable_unit, d = self.kelvin_to_celsius(
                    self.variable_unit, d)

                data.append(d)
                if self.filetype not in ['csv', 'odv', 'txt']:
                    if len(var.dimensions) == 3:
                        self.depth_label = ""
                    elif self.depth == 'bottom':
                        self.depth_label = " at Bottom"
                    else:
                        self.depth_label = " at " + \
                            str(int(np.round(depth_value))) + " m"

            if len(data) == 2:
                data[0] = np.sqrt(data[0] ** 2 + data[1] ** 2)

            self.data = data[0]

            quiver_data = []

            if self.quiver is not None and \
                self.quiver['variable'] != '' and \
                    self.quiver['variable'] != 'none':
                for v in self.quiver['variable'].split(','):
                    allvars.append(v)
                    var = dataset.variables[v]
                    quiver_unit = get_variable_unit(self.dataset_name, var)
                    quiver_name = get_variable_name(self.dataset_name, var)
                    quiver_lon, quiver_lat = self.basemap.makegrid(50, 50)
                    d = dataset.get_area(
                        np.array([quiver_lat, quiver_lon]),
                        self.depth,
                        self.time,
                        v
                    )
                    quiver_data.append(d)

                self.quiver_name = self.vector_name(quiver_name)
                self.quiver_longitude = quiver_lon
                self.quiver_latitude = quiver_lat
                self.quiver_unit = quiver_unit
            self.quiver_data = quiver_data

            if all(map(lambda v: len(dataset.variables[v].dimensions) == 3,
                       allvars)):
                self.depth = 0

            contour_data = []
            if self.contour is not None and \
                self.contour['variable'] != '' and \
                    self.contour['variable'] != 'none':
                d = dataset.get_area(
                    np.array([self.latitude, self.longitude]),
                    self.depth,
                    self.time,
                    self.contour['variable']
                )
                contour_unit = get_variable_unit(
                    self.dataset_name,
                    dataset.variables[self.contour['variable']])
                contour_name = get_variable_name(
                    self.dataset_name,
                    dataset.variables[self.contour['variable']])
                contour_factor = get_variable_scale_factor(
                    self.dataset_name,
                    dataset.variables[self.contour['variable']])
                contour_unit, d = self.kelvin_to_celsius(contour_unit, d)
                d = np.multiply(d, contour_factor)
                contour_data.append(d)
                self.contour_unit = contour_unit
                self.contour_name = contour_name

            self.contour_data = contour_data

            self.timestamp = dataset.timestamps[self.time]

        if self.variables != self.variables_anom:
            self.variable_name += " Anomaly"
            with open_dataset(
                get_dataset_climatology(self.dataset_name)
            ) as dataset:
                data = []
                for v in self.variables:
                    var = dataset.variables[v]
                    d = dataset.get_area(
                        np.array([self.latitude, self.longitude]),
                        self.depth,
                        self.timestamp.month - 1,
                        v
                    )
                    data.append(d)

                if len(data) == 2:
                    data = np.sqrt(data[0] ** 2 + data[1] ** 2)
                else:
                    data = data[0]

                u, data = self.kelvin_to_celsius(
                    dataset.variables[self.variables[0]].unit,
                    data)

                self.data -= data

        # Load bathymetry data
        self.bathymetry = overlays.bathymetry(
            self.basemap,
            self.latitude,
            self.longitude,
            blur=2
        )

        if self.depth != 'bottom' and self.depth != 0:
            if len(quiver_data) > 0:
                quiver_bathymetry = overlays.bathymetry(
                    self.basemap, quiver_lat, quiver_lon)

            self.data[np.where(self.bathymetry < depth_value)] = np.ma.masked
            for d in self.quiver_data:
                d[np.where(quiver_bathymetry < depth_value)] = np.ma.masked
            for d in self.contour_data:
                d[np.where(self.bathymetry < depth_value)] = np.ma.masked
        else:
            mask = maskoceans(self.longitude, self.latitude, self.data).mask
            self.data[~mask] = np.ma.masked
            for d in self.quiver_data:
                mask = maskoceans(
                    self.quiver_longitude, self.quiver_latitude, d).mask
                d[~mask] = np.ma.masked
            for d in contour_data:
                mask = maskoceans(self.longitude, self.latitude, d).mask
                d[~mask] = np.ma.masked

        if self.area and self.filetype in ['csv', 'odv', 'txt', 'geotiff']:
            area_polys = []
            for a in self.area:
                rings = [LinearRing(p) for p in a['polygons']]
                innerrings = [LinearRing(p) for p in a['innerrings']]

                polygons = []
                for r in rings:
                    inners = []
                    for ir in innerrings:
                        if r.contains(ir):
                            inners.append(ir)

                    polygons.append(Poly(r, inners))

                area_polys.append(MultiPolygon(polygons))

            points = [Point(p) for p in zip(self.latitude.ravel(),
                                            self.longitude.ravel())]

            indicies = []
            for a in area_polys:
                indicies.append(np.where(
                    map(
                        lambda p, poly=a: poly.contains(p),
                        points
                    )
                )[0])

            indicies = np.unique(np.array(indicies).ravel())
            newmask = np.ones(self.data.shape, dtype=bool)
            newmask[np.unravel_index(indicies, newmask.shape)] = False
            self.data.mask |= newmask

        self.depth_value = depth_value
Пример #37
0
def plot(projection, x, y, z, args):
    lat, lon = get_latlon_coords(projection, x, y, z)
    if len(lat.shape) == 1:
        lat, lon = np.meshgrid(lat, lon)

    dataset_name = args.get('dataset')
    variable = args.get('variable')
    if variable.endswith('_anom'):
        variable = variable[0:-5]
        anom = True
    else:
        anom = False

    variable = variable.split(',')

    depth = args.get('depth')

    scale = args.get('scale')
    scale = [float(component) for component in scale.split(',')]

    data = []
    with open_dataset(get_dataset_url(dataset_name)) as dataset:
        if args.get('time') is None or (type(args.get('time')) == str and
                                        len(args.get('time')) == 0):
            time = -1
        else:
            time = int(args.get('time'))

        t_len = len(dataset.timestamps)
        while time >= t_len:
            time -= t_len

        while time < 0:
            time += len(dataset.timestamps)

        timestamp = dataset.timestamps[time]

        for v in variable:
            data.append(dataset.get_area(
                np.array([lat, lon]),
                depth,
                time,
                v
            ))

        variable_name = get_variable_name(dataset_name,
                                          dataset.variables[variable[0]])
        variable_unit = get_variable_unit(dataset_name,
                                          dataset.variables[variable[0]])
        scale_factor = get_variable_scale_factor(
            dataset_name,
            dataset.variables[variable[0]]
        )
        if anom:
            cmap = colormap.colormaps['anomaly']
        else:
            cmap = colormap.find_colormap(variable_name)

        if depth != 'bottom':
            depthm = dataset.depths[depth]
        else:
            depthm = 0

    if scale_factor != 1.0:
        for idx, val in enumerate(data):
            data[idx] = np.multiply(val, scale_factor)

    if variable_unit.startswith("Kelvin"):
        variable_unit = "Celsius"
        for idx, val in enumerate(data):
            data[idx] = np.add(val, -273.15)

    if len(data) == 1:
        data = data[0]

    if len(data) == 2:
        data = np.sqrt(data[0] ** 2 + data[1] ** 2)
        if not anom:
            cmap = colormap.colormaps.get('speed')

    if anom:
        with open_dataset(get_dataset_climatology(dataset_name)) as dataset:
            a = dataset.get_area(
                np.array([lat, lon]),
                depth,
                timestamp.month - 1,
                v
            )
            data = data - a

    f, fname = tempfile.mkstemp()
    os.close(f)

    data = data.transpose()
    xpx = x * 256
    ypx = y * 256

    with Dataset(ETOPO_FILE % (projection, z), 'r') as dataset:
        bathymetry = dataset["z"][ypx:(ypx + 256), xpx:(xpx + 256)]

    bathymetry = gaussian_filter(bathymetry, 0.5)

    data[np.where(bathymetry > -depthm)] = np.ma.masked

    sm = matplotlib.cm.ScalarMappable(
        matplotlib.colors.Normalize(vmin=scale[0], vmax=scale[1]), cmap=cmap)
    img = sm.to_rgba(np.squeeze(data))

    im = Image.fromarray((img * 255.0).astype(np.uint8))
    im.save(fname, format='png', optimize=True)
    with open(fname, 'r') as f:
        buf = f.read()
        os.remove(fname)

    return buf
Пример #38
0
    def load_data(self):
        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            if self.time < 0:
                self.time += len(dataset.timestamps)
            time = np.clip(self.time, 0, len(dataset.timestamps) - 1)

            for idx, v in enumerate(self.variables):
                var = dataset.variables[v]
                if not (set(var.dimensions) & set(dataset.depth_dimensions)):
                    for potential in dataset.variables:
                        if potential in self.variables:
                            continue
                        pot = dataset.variables[potential]
                        if (set(pot.dimensions) &
                                set(dataset.depth_dimensions)):
                            if len(pot.shape) > 3:
                                self.variables[idx] = potential
                                self.variables_anom[idx] = potential

            value = parallel = perpendicular = None

            variable_names = self.get_variable_names(dataset, self.variables)
            variable_units = self.get_variable_units(dataset, self.variables)
            scale_factors = self.get_variable_scale_factors(dataset,
                                                            self.variables)

            if len(self.variables) > 1:
                v = []
                for name in self.variables:
                    v.append(dataset.variables[name])

                distances, times, lat, lon, bearings = geo.path_to_points(
                    self.points, 100
                )
                transect_pts, distance, x, dep = dataset.get_path_profile(
                    self.points, time, self.variables[0], 100)
                transect_pts, distance, y, dep = dataset.get_path_profile(
                    self.points, time, self.variables[1], 100)

                x = np.multiply(x, scale_factors[0])
                y = np.multiply(y, scale_factors[1])

                r = np.radians(np.subtract(90, bearings))
                theta = np.arctan2(y, x) - r
                mag = np.sqrt(x ** 2 + y ** 2)

                parallel = mag * np.cos(theta)
                perpendicular = mag * np.sin(theta)

            else:
                transect_pts, distance, value, dep = dataset.get_path_profile(
                    self.points, time, self.variables[0])

                value = np.multiply(value, scale_factors[0])

            variable_units[0], value = self.kelvin_to_celsius(
                variable_units[0],
                value
            )

            if len(self.variables) == 2:
                variable_names[0] = self.vector_name(variable_names[0])

            if self.cmap is None:
                self.cmap = colormap.find_colormap(variable_names[0])

            self.timestamp = dataset.timestamps[int(time)]

            self.depth = dep
            self.depth_unit = "m"

            self.transect_data = {
                "points": transect_pts,
                "distance": distance,
                "data": value,
                "name": variable_names[0],
                "unit": variable_units[0],
                "parallel": parallel,
                "perpendicular": perpendicular,
            }

            if self.surface is not None:
                surface_pts, surface_dist, t, surface_value = \
                    dataset.get_path(
                        self.points,
                        0,
                        time,
                        self.surface,
                    )
                surface_unit = get_variable_unit(
                    self.dataset_name,
                    dataset.variables[self.surface]
                )
                surface_name = get_variable_name(
                    self.dataset_name,
                    dataset.variables[self.surface]
                )
                surface_factor = get_variable_scale_factor(
                    self.dataset_name,
                    dataset.variables[self.surface]
                )
                surface_value = np.multiply(surface_value, surface_factor)
                surface_unit, surface_value = self.kelvin_to_celsius(
                    surface_unit,
                    surface_value
                )

                self.surface_data = {
                    "points": surface_pts,
                    "distance": surface_dist,
                    "data": surface_value,
                    "name": surface_name,
                    "unit": surface_unit
                }

        if self.variables != self.variables_anom:
            with open_dataset(
                get_dataset_climatology(self.dataset_name)
            ) as dataset:
                if self.variables[0] in dataset.variables:
                    if len(self.variables) == 1:
                        climate_points, climate_distance, climate_data = \
                            dataset.get_path_profile(self.points,
                                                     self.timestamp.month - 1,
                                                     self.variables[0])
                        u, climate_data = self.kelvin_to_celsius(
                            dataset.variables[self.variables[0]].unit,
                            climate_data
                        )
                        self.transect_data['data'] -= - climate_data
                    else:
                        climate_pts, climate_distance, climate_x, cdep = \
                            dataset.get_path_profile(
                                self.points,
                                self.timestamp.month - 1,
                                self.variables[0],
                                100
                            )
                        climate_pts, climate_distance, climate_y, cdep = \
                            dataset.get_path_profile(
                                self.points,
                                self.timestamp.month - 1,
                                self.variables[0],
                                100
                            )

                        climate_distances, ctimes, clat, clon, bearings = \
                            geo.path_to_points(self.points, 100)

                        r = np.radians(np.subtract(90, bearings))
                        theta = np.arctan2(y, x) - r
                        mag = np.sqrt(x ** 2 + y ** 2)

                        climate_parallel = mag * np.cos(theta)
                        climate_perpendicular = mag * np.sin(theta)

                        self.transect_data['parallel'] -= climate_parallel
                        self.transect_data[
                            'perpendicular'] -= climate_perpendicular

        # Bathymetry
        with Dataset(app.config['BATHYMETRY_FILE'], 'r') as dataset:
            bath_x, bath_y = bathymetry(
                dataset.variables['y'],
                dataset.variables['x'],
                dataset.variables['z'],
                self.points)

        self.bathymetry = {
            'x': bath_x,
            'y': bath_y
        }
Пример #39
0
    def load_data(self):
        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            self.load_misc(dataset, self.variables)
            self.fix_startend_times(dataset)

            self.variable_unit = get_variable_unit(
                self.dataset_name,
                dataset.variables[self.variables[0]]
            )
            self.variable_name = get_variable_name(
                self.dataset_name,
                dataset.variables[self.variables[0]]
            )

            var = self.variables[0]
            if self.depth != 'all' and self.depth != 'bottom' and \
                (set(dataset.variables[var].dimensions) &
                    set(dataset.depth_dimensions)):
                self.depth_label = " at %d m" % (
                    np.round(dataset.depths[self.depth])
                )

            elif self.depth == 'bottom':
                self.depth_label = ' at Bottom'
            else:
                self.depth_label = ''

            if not (set(dataset.variables[var].dimensions) &
                    set(dataset.depth_dimensions)):
                self.depth = 0

            times = None
            point_data = []
            for p in self.points:
                data = []
                for v in self.variables:
                    if self.depth == 'all':
                        d, dep = dataset.get_timeseries_profile(
                            float(p[0]),
                            float(p[1]),
                            self.starttime,
                            self.endtime,
                            v
                        )
                    else:
                        d, dep = dataset.get_timeseries_point(
                            float(p[0]),
                            float(p[1]),
                            self.depth,
                            self.starttime,
                            self.endtime,
                            v,
                            return_depth=True
                        )

                    data.append(d)

                point_data.append(np.ma.array(data))

            point_data = np.ma.array(point_data)
            for idx, factor in enumerate(self.scale_factors):
                if factor != 1.0:
                    point_data[idx] = np.multiply(point_data[idx], factor)

            times = dataset.timestamps[self.starttime:self.endtime + 1]
            if self.query.get('dataset_quantum') == 'month':
                times = [datetime.date(x.year, x.month, 1) for x in times]

            # depths = dataset.depths
            depths = dep

        # TODO: pint
        if self.variable_unit.startswith("Kelvin"):
            self.variable_unit = "Celsius"
            for idx, v in enumerate(self.variables):
                point_data[:, idx, :] = point_data[:, idx, :] - 273.15

        if point_data.shape[1] == 2:
            point_data = np.ma.expand_dims(
                np.sqrt(
                    point_data[:, 0, :] ** 2 + point_data[:, 1, :] ** 2
                ), 1
            )

        self.times = times
        self.data = point_data
        self.depths = depths
        self.depth_unit = "m"
Пример #40
0
    def load_data(self):
        if isinstance(self.observation[0], numbers.Number):
            self.observation_variable_names = []
            self.observation_variable_units = []
            with Dataset(app.config["OBSERVATION_AGG_URL"], 'r') as ds:
                t = netcdftime.utime(ds['time'].units)
                for idx, o in enumerate(self.observation):
                    observation = {}
                    ts = t.num2date(ds['time'][o]).replace(tzinfo=pytz.UTC)
                    observation['time'] = ts.isoformat()
                    observation['longitude'] = ds['lon'][o]
                    observation['latitude'] = ds['lat'][o]

                    observation['depth'] = ds['z'][:]
                    observation['depthunit'] = ds['z'].units

                    observation['datatypes'] = []
                    data = []
                    for v in sorted(ds.variables):
                        if v in ['z', 'lat', 'lon', 'profile', 'time']:
                            continue
                        var = ds[v]
                        if var.datatype == '|S1':
                            continue

                        observation['datatypes'].append("%s [%s]" % (
                            var.long_name,
                            var.units
                        ))
                        data.append(var[o, :])

                        if idx == 0:
                            self.observation_variable_names.append(
                                var.long_name)
                            self.observation_variable_units.append(var.units)

                    observation['data'] = np.ma.array(data).transpose()
                    self.observation[idx] = observation

                self.points = map(lambda o: [o['latitude'], o['longitude']],
                                  self.observation)

        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            ts = dataset.timestamps

            observation_times = []
            timestamps = []
            for o in self.observation:
                observation_time = dateutil.parser.parse(o['time'])
                observation_times.append(observation_time)

                deltas = [
                    (x.replace(tzinfo=pytz.UTC) -
                     observation_time).total_seconds()
                    for x in ts]

                time = np.abs(deltas).argmin()
                timestamp = ts[time]
                timestamps.append(timestamp)

            self.load_misc(dataset, self.variables)

            point_data, self.depths = self.get_data(
                dataset, self.variables, time)
            point_data = np.ma.array(point_data)

            point_data = self.apply_scale_factors(point_data)

            self.variable_units, point_data = self.kelvin_to_celsius(
                self.variable_units,
                point_data
            )

        self.data = self.subtract_climatology(point_data, timestamp)
        self.observation_time = observation_time
        self.observation_times = observation_times
        self.timestamps = timestamps
        self.timestamp = timestamp
Пример #41
0
    def load_data(self):
        ds_url = app.config['DRIFTER_URL']
        data_names = []
        data_units = []
        with Dataset(ds_url % self.drifter, 'r') as ds:
            self.name = ds.buoyid

            self.imei = str(chartostring(ds['imei'][0]))
            self.wmo = str(chartostring(ds['wmo'][0]))

            t = netcdftime.utime(ds['data_date'].units)

            d = []
            for v in self.buoyvariables:
                d.append(ds[v][:])
                if "long_name" in ds[v].ncattrs():
                    data_names.append(ds[v].long_name)
                else:
                    data_names.append(v)

                if "units" in ds[v].ncattrs():
                    data_units.append(ds[v].units)
                else:
                    data_units.append(None)

            self.data = d

            self.times = t.num2date(ds['data_date'][:])
            self.points = np.array([
                ds['latitude'][:],
                ds['longitude'][:],
            ]).transpose()

        data_names = data_names[:len(self.buoyvariables)]
        data_units = data_units[:len(self.buoyvariables)]

        for i, t in enumerate(self.times):
            if t.tzinfo is None:
                self.times[i] = t.replace(tzinfo=pytz.UTC)

        self.data_names = data_names
        self.data_units = data_units

        if self.starttime is not None:
            d = dateutil.parser.parse(self.starttime)
            self.start = np.where(self.times >= d)[0].min()
        else:
            self.start = 0

        if self.endtime is not None:
            d = dateutil.parser.parse(self.endtime)
            self.end = np.where(self.times <= d)[0].max() + 1
        else:
            self.end = len(self.times) - 1

        if self.start < 0:
            self.start += len(self.times)
        self.start = np.clip(self.start, 0, len(self.times) - 1)
        if self.end < 0:
            self.end += len(self.times)
        self.end = np.clip(self.end, 0, len(self.times) - 1)

        with open_dataset(get_dataset_url(self.dataset_name)) as dataset:
            depth = int(self.depth)

            try:
                model_start = np.where(
                    dataset.timestamps <= self.times[self.start]
                )[0][-1]
            except IndexError:
                model_start = 0

            model_start -= 1
            model_start = np.clip(model_start, 0, len(dataset.timestamps) - 1)

            try:
                model_end = np.where(
                    dataset.timestamps >= self.times[self.end]
                )[0][0]
            except IndexError:
                model_end = len(dataset.timestamps) - 1

            model_end += 1
            model_end = np.clip(
                model_end,
                model_start,
                len(dataset.timestamps) - 1
            )

            model_times = map(
                lambda t: time.mktime(t.timetuple()),
                dataset.timestamps[model_start:model_end + 1]
            )
            output_times = map(
                lambda t: time.mktime(t.timetuple()),
                self.times[self.start:self.end + 1]
            )
            d = []
            for v in self.variables:
                pts, dist, mt, md = dataset.get_path(
                    self.points[self.start:self.end + 1],
                    depth,
                    range(model_start, model_end + 1),
                    v,
                    times=output_times
                )

                f = interp1d(
                    model_times,
                    md,
                    assume_sorted=True,
                    bounds_error=False,
                )

                d.append(np.diag(f(mt)))

            model_data = np.ma.array(d)

            variable_names = []
            variable_units = []
            scale_factors = []

            for v in self.variables:
                variable_units.append(get_variable_unit(self.dataset_name,
                                                        dataset.variables[v]))
                variable_names.append(get_variable_name(self.dataset_name,
                                                        dataset.variables[v]))
                scale_factors.append(
                    get_variable_scale_factor(self.dataset_name,
                                              dataset.variables[v])
                )

            for idx, sf in enumerate(scale_factors):
                model_data[idx, :] = np.multiply(model_data[idx, :], sf)

            for idx, u in enumerate(variable_units):
                variable_units[idx], model_data[idx, :] = \
                    self.kelvin_to_celsius(u, model_data[idx, :])

            self.model_data = model_data
            self.model_times = map(datetime.datetime.utcfromtimestamp, mt)
            self.variable_names = variable_names
            self.variable_units = variable_units
Пример #42
0
def stats(dataset_name, query):
    variables = query.get('variable')
    if isinstance(variables, str) or isinstance(variables, unicode):
        variables = variables.split(',')

    variables = [re.sub('_anom$', '', v) for v in variables]

    area = query.get('area')
    names = None
    data = None

    names = []
    all_rings = []
    for idx, a in enumerate(area):
        if isinstance(a, str) or isinstance(a, unicode):
            a = a.encode("utf-8")
            sp = a.split('/', 1)
            if data is None:
                data = list_areas(sp[0])

            b = [x for x in data if x.get('key') == a]
            a = b[0]
            area[idx] = a

        rings = [LinearRing(p) for p in a['polygons']]
        if len(rings) > 1:
            u = cascaded_union(rings)
        else:
            u = rings[0]
        all_rings.append(u.envelope)
        if a.get('name'):
            names.append(a.get('name'))

    names = sorted(names)

    if len(all_rings) > 1:
        combined = cascaded_union(all_rings)
    else:
        combined = all_rings[0]

    combined = combined.envelope
    bounds = combined.bounds

    area_polys = []
    output = []
    for a in area:
        rings = [LinearRing(p) for p in a['polygons']]
        innerrings = [LinearRing(p) for p in a['innerrings']]

        polygons = []
        for r in rings:
            inners = []
            for ir in innerrings:
                if r.contains(ir):
                    inners.append(ir)

            polygons.append(Polygon(r, inners))

        area_polys.append(MultiPolygon(polygons))

        output.append({
            'name': a.get('name'),
            'variables': [],
        })

    with open_dataset(get_dataset_url(dataset_name)) as dataset:
        if query.get('time') is None or (type(query.get('time')) == str and
                                         len(query.get('time')) == 0):
            time = -1
        else:
            time = int(query.get('time'))

        if time < 0:
            time += len(dataset.timestamps)
        time = np.clip(time, 0, len(dataset.timestamps) - 1)

        depth = 0
        depthm = 0

        if query.get('depth'):
            if query.get('depth') == 'bottom':
                depth = 'bottom'
                depthm = 'Bottom'
            if len(query.get('depth')) > 0 and \
                    query.get('depth') != 'bottom':
                depth = int(query.get('depth'))

                depth = np.clip(depth, 0, len(dataset.depths) - 1)
                depthm = dataset.depths[depth]

        lat, lon = np.meshgrid(
            np.linspace(bounds[0], bounds[2], 50),
            np.linspace(bounds[1], bounds[3], 50)
        )

        output_fmtstr = "%6.5g"
        for v_idx, v in enumerate(variables):
            var = dataset.variables[v]

            variable_name = get_variable_name(dataset_name, var)
            variable_unit = get_variable_unit(dataset_name, var)
            scale_factor = get_variable_scale_factor(dataset_name, var)

            lat, lon, d = dataset.get_raw_point(
                lat.ravel(),
                lon.ravel(),
                depth,
                time,
                v
            )

            if scale_factor != 1.0:
                d = np.multiply(d, scale_factor)

            if variable_unit.startswith("Kelvin"):
                variable_unit = "Celsius"
                d = d - 273.15

            lon[np.where(lon > 180)] -= 360

            if len(var.dimensions) == 3:
                variable_depth = ""
            elif depth == 'bottom':
                variable_depth = "(@ Bottom)"
            else:
                variable_depth = "(@%d m)" % np.round(depthm)

            points = [Point(p) for p in zip(lat.ravel(), lon.ravel())]
            for i, a in enumerate(area):
                indices = np.where(
                    map(
                        lambda p, poly=area_polys[i]: poly.contains(p),
                        points
                    )
                )

                selection = np.ma.array(d.ravel()[indices])
                if len(selection) > 0 and not selection.mask.all():
                    output[i]['variables'].append({
                        'name': ("%s %s" % (variable_name,
                                            variable_depth)).strip(),
                        'unit': variable_unit,
                        'min': output_fmtstr % (
                            np.ma.amin(selection).astype(float)
                        ),
                        'max': output_fmtstr % (
                            np.ma.amax(selection).astype(float)
                        ),
                        'mean': output_fmtstr % (
                            np.ma.mean(selection).astype(float)
                        ),
                        'median': output_fmtstr % (
                            np.ma.median(selection).astype(float)
                        ),
                        'stddev': output_fmtstr % (
                            np.ma.std(selection).astype(float)
                        ),
                        'num': "%d" % selection.count(),
                    })
                else:
                    output[i]['variables'].append({
                        'name': ("%s %s" % (variable_name,
                                            variable_depth)).strip(),
                        'unit': variable_unit,
                        'min': gettext("No Data"),
                        'max': gettext("No Data"),
                        'mean': gettext("No Data"),
                        'median': gettext("No Data"),
                        'stddev': gettext("No Data"),
                        'num': "0",
                    })

    return json.dumps(sorted(output, key=itemgetter('name')))