예제 #1
0
파일: Input.py 프로젝트: dsiuta/verif
class Comps(Input):
    _dimensionMetrics = ["Date", "Offset", "Location", "Lat", "Lon", "Elev"]
    _description = Util.formatArgument(
        "netcdf", "Undocumented legacy " +
        "NetCDF format, to be phased out. A new NetCDF based format will " +
        "be defined.")

    def __init__(self, filename):
        Input.__init__(self, filename)
        self._file = netcdf(filename, 'r')

    def getName(self):
        return self._file.variables

    def getStations(self):
        lat = Util.clean(self._file.variables["Lat"])
        lon = Util.clean(self._file.variables["Lon"])
        id = Util.clean(self._file.variables["Location"])
        elev = Util.clean(self._file.variables["Elev"])
        stations = list()
        for i in range(0, lat.shape[0]):
            station = Station.Station(id[i], lat[i], lon[i], elev[i])
            stations.append(station)
        return stations

    def getScores(self, metric):
        metric = self._toPvarComps(metric)
        temp = Util.clean(self._file.variables[metric])
        return temp

    def _toPvarVerif(self, metric):
        if (metric[0] == "p" and metric != "pit"):
            metric = metric.replace("m", "-")
            if (metric != "p0"):
                metric = metric.replace("p0", "p0.")
            metric = metric.replace("p-0", "p-0.")
        return metric

    def _toPvarComps(self, metric):
        if (metric[0] == "p" and metric != "pit"):
            metric = metric.replace("-", "m")
            metric = metric.replace(".", "")
        return metric

    def getDims(self, metric):
        metric = self._toPvarComps(metric)
        return self._file.variables[metric].dimensions

    def getDates(self):
        return Util.clean(self._file.variables["Date"])

    def getOffsets(self):
        return Util.clean(self._file.variables["Offset"])

    def getThresholds(self):
        thresholds = list()
        for (metric, v) in self._file.variables.iteritems():
            if (metric not in self._dimensionMetrics):
                if (metric[0] == "p" and metric != "pit"):
                    metric = self._toPvarVerif(metric)
                    thresholds.append(float(metric[1:]))
        return thresholds

    def getQuantiles(self):
        quantiles = list()
        for (metric, v) in self._file.variables.iteritems():
            if (metric not in self._dimensionMetrics):
                if (metric[0] == "q"):
                    quantiles.append(float(metric[1:]))
        return quantiles

    def getMetrics(self):
        metrics = list()
        for (metric, v) in self._file.variables.iteritems():
            if (metric not in self._dimensionMetrics):
                metrics.append(metric)
        return metrics

    def getVariables(self):
        metrics = list()
        for (metric, v) in self._file.variables.iteritems():
            metrics.append(metric)
        for i in range(0, len(metrics)):
            metrics[i] = self._toPvarVerif(metrics[i])
        return metrics

    def getUnits(self):
        if (hasattr(self._file, "Units")):
            if (self._file.Units == ""):
                return "No units"
            elif (self._file.Units == "%"):
                return "%"
            else:
                return "$" + self._file.Units + "$"
        else:
            return "No units"

    def getVariable(self):
        return self._file.Variable

    @staticmethod
    def isValid(filename):
        try:
            file = netcdf(filename, 'r')
        except:
            return False
        return True
예제 #2
0
파일: Driver.py 프로젝트: dsiuta/verif
def showDescription(data=None):
    desc = "Program to compute verification scores for weather forecasts. Can be\
   used to compare forecasts from different files. In that case only dates,\
   offsets, and locations that are common to all forecast files are used."

    print textwrap.fill(desc, Util.getTextWidth())
    print ""
    print "usage: verif files -m metric [options]"
    print "       verif files [--list-thresholds] [--list-quantiles] [--list-locations]"
    print "       verif --version"
    print ""
    print Util.green("Arguments:")
    print Util.formatArgument(
        "files",
        "One or more verification files in NetCDF or text format (see 'File Formats' below)."
    )
    print Util.formatArgument(
        "-m metric", "Which verification metric to use? See 'Metrics' below.")
    print Util.formatArgument("--list-thresholds",
                              "What thresholds are available in the files?")
    print Util.formatArgument("--list-quantiles",
                              "What quantiles are available in the files?")
    print Util.formatArgument("--list-locations",
                              "What locations are available in the files?")
    print Util.formatArgument("--version", "What version of verif is this?")
    print ""
    print Util.green("Options:")
    print "Note: vectors can be entered using commas, or MATLAB syntax (i.e 3:5 is 3,4,5 and 3:2:7 is 3,5,7)"
    # Dimensions
    print Util.green("  Dimensions and subset:")
    print Util.formatArgument(
        "-d dates",
        "A vector of dates in YYYYMMDD format, e.g.  20130101:20130201.")
    print Util.formatArgument("-l locations",
                              "Limit the verification to these location IDs.")
    print Util.formatArgument(
        "-llrange range",
        "Limit the verification to locations within minlon,maxlon,minlat,maxlat."
    )
    print Util.formatArgument(
        "-o offsets", "Limit the verification to these offsets (in hours).")
    print Util.formatArgument(
        "-r thresholds",
        "Compute scores for these thresholds (only used by some metrics).")
    print Util.formatArgument(
        "-t period",
        "Allow this many days of training, i.e. remove this many days from the beginning of the verification."
    )
    print Util.formatArgument(
        "-x dim",
        "Plot this dimension on the x-axis: date, offset, year, month, location, locationId, locationElev, locationLat, locationLon, threshold, or none. Not supported by all metrics. If not specified, then a default is used based on the metric. 'none' collapses all dimensions and computes one value."
    )

    # Data manipulation
    print Util.green("  Data manipulation:")
    print Util.formatArgument(
        "-acc", "Plot accumulated values. Only works for non-derived metrics")
    print Util.formatArgument(
        "-b type",
        "One of 'below', 'within', or 'above'. For threshold plots (ets, hit, within, etc) 'below/above' computes frequency below/above the threshold, and 'within' computes the frequency between consecutive thresholds."
    )
    print Util.formatArgument(
        "-c file",
        "File containing climatology data. Subtract all forecasts and obs with climatology values."
    )
    print Util.formatArgument(
        "-C file",
        "File containing climatology data. Divide all forecasts and obs by climatology values."
    )
    print Util.formatArgument(
        "-ct type",
        "Collapsing type: 'min', 'mean', 'median', 'max', 'std', 'range', or a number between 0 and 1. Some metrics computes a value for each value on the x-axis. Which function should be used to do the collapsing? Default is 'mean'. Only supported by some metrics. A number between 0 and 1 returns a specific quantile (e.g.  0.5 is the median)"
    )
    print Util.formatArgument(
        "-hist",
        "Plot values as histogram. Only works for non-derived metrics")
    print Util.formatArgument(
        "-sort", "Plot values sorted. Only works for non-derived metrics")

    # Plot options
    print Util.green("  Plotting options:")
    print Util.formatArgument(
        "-bot value", "Bottom boundary location for saved figure [range 0-1]")
    print Util.formatArgument(
        "-clim limits", "Force colorbar limits to the two values lower,upper")
    print Util.formatArgument(
        "-dpi value", "Resolution of image in dots per inch (default 100)")
    print Util.formatArgument("-f file", "Save image to this filename")
    print Util.formatArgument(
        "-fs size", "Set figure size width,height (in inches). Default 8x6.")
    print Util.formatArgument(
        "-leg titles",
        "Comma-separated list of legend titles. Use '_' to represent space.")
    print Util.formatArgument("-lw width", "How wide should lines be?")
    print Util.formatArgument("-labfs size", "Font size for axis labels")
    print Util.formatArgument(
        "-left value", "Left boundary location for saved figure [range 0-1]")
    print Util.formatArgument(
        "-legfs size", "Font size for legend. Set to 0 to hide legend.")
    print Util.formatArgument("-majlth length", "Length of major tick marks")
    print Util.formatArgument("-majtwid width",
                              "Adjust the thickness of the major tick marks")
    print Util.formatArgument("-minlth length", "Length of minor tick marks")
    print Util.formatArgument("-ms size", "How big should markers be?")
    print Util.formatArgument(
        "-nomargin", "Remove margins (whitespace) in the plot not x[i] <= T.")
    print Util.formatArgument(
        "-right value", "Right boundary location for saved figure [range 0-1]")
    print Util.formatArgument("-sp",
                              "Show a line indicating the perfect score")
    print Util.formatArgument("-tickfs size", "Font size for axis ticks")
    print Util.formatArgument("-title text", "Custom title to chart top")
    print Util.formatArgument(
        "-top value", "Top boundary location for saved figure [range 0-1]")
    print Util.formatArgument(
        "-type type", "One of 'plot' (default), 'text', 'map', or 'maprank'.")
    print Util.formatArgument("-xlabel text", "Custom x-axis label")
    print Util.formatArgument(
        "-xlim limits", "Force x-axis limits to the two values lower,upper")
    print Util.formatArgument("-xrot value",
                              "Rotation angle for x-axis labels")
    print Util.formatArgument("-ylabel text", "Custom y-axis label")
    print Util.formatArgument(
        "-ylim limits", "Force y-axis limits to the two values lower,upper")
    print ""
    metrics = Metric.getAllMetrics()
    outputs = Output.getAllOutputs()
    print Util.green("Metrics (-m):")
    metricOutputs = metrics + outputs
    metricOutputs.sort(key=lambda x: x[0].lower(), reverse=False)
    for m in metricOutputs:
        name = m[0].lower()
        if (m[1].isValid()):
            desc = m[1].summary()
            print Util.formatArgument(name, desc)
            # print "   %-14s%s" % (name, textwrap.fill(desc, 80).replace('\n', '\n                 ')),
            # print ""
    if (data is not None):
        print ""
        print "  Or one of the following, which plots the raw score from the file:"
        print " ",
        metrics = data.getMetrics()
        for metric in metrics:
            print metric,
    print ""
    print ""
    print Util.green("File formats:")
    print Input.Text.description()
    print Input.Comps.description()
예제 #3
0
파일: Input.py 프로젝트: dsiuta/verif
class Text(Input):
    _description = Util.formatArgument("text", "Data organized in rows and columns with space as a delimiter. Each row represents one forecast/obs pair, and each column represents one attribute of the data. Here is an example:") + "\n"\
    + Util.formatArgument("", "") + "\n"\
    + Util.formatArgument("", "# variable: Temperature") + "\n"\
    + Util.formatArgument("", "# units: $^oC$") + "\n"\
    + Util.formatArgument("", "date     offset id      lat     lon      elev obs fcst      p10") + "\n"\
    + Util.formatArgument("", "20150101 0      214     49.2    -122.1   92 3.4 2.1     0.91") + "\n"\
    + Util.formatArgument("", "20150101 1      214     49.2    -122.1   92 4.7 4.2      0.85") + "\n"\
    + Util.formatArgument("", "20150101 0      180     50.3    -120.3   150 0.2 -1.2 0.99") + "\n"\
    + Util.formatArgument("", "") + "\n"\
    + Util.formatArgument("", " Any lines starting with '#' can be metadata (currently variable: and units: are recognized). After that is a header line that must describe the data columns below. The following attributes are recognized: date (in YYYYMMDD), offset (in hours), id (station identifier), lat (in degrees), lon (in degrees), obs (observations), fcst (deterministic forecast), p<number> (cumulative probability at a threshold of 10). obs and fcst are required columns: a value of 0 is used for any missing column. The columns can be in any order. If 'id' is not provided, then they are assigned sequentially starting at 0.")

    def __init__(self, filename):
        import csv
        Input.__init__(self, filename)
        file = open(filename, 'r')
        self._units = "Unknown units"
        self._variable = "Unknown"
        self._pit = None

        self._dates = set()
        self._offsets = set()
        self._stations = set()
        self._quantiles = set()
        self._thresholds = set()
        fields = dict()
        obs = dict()
        fcst = dict()
        cdf = dict()
        pit = dict()
        x = dict()
        indices = dict()
        header = None

        # Default values if columns not available
        offset = 0
        date = 0
        lat = 0
        lon = 0
        elev = 0

        import time
        start = time.time()
        # Read the data into dictionary with (date,offset,lat,lon,elev) as key and obs/fcst as values
        for rowstr in file:
            if (rowstr[0] == "#"):
                curr = rowstr[1:]
                curr = curr.split()
                if (curr[0] == "variable:"):
                    self._variable = curr[1]
                elif (curr[0] == "units:"):
                    self._units = curr[1]
                else:
                    Util.warning("Ignoring line '" + rowstr.strip() +
                                 "' in file '" + filename + "'")
            else:
                row = rowstr.split()
                if (header is None):
                    # Parse the header so we know what each column represents
                    header = row
                    for i in range(0, len(header)):
                        att = header[i]
                        if (att == "date"):
                            indices["date"] = i
                        elif (att == "offset"):
                            indices["offset"] = i
                        elif (att == "lat"):
                            indices["lat"] = i
                        elif (att == "lon"):
                            indices["lon"] = i
                        elif (att == "elev"):
                            indices["elev"] = i
                        elif (att == "obs"):
                            indices["obs"] = i
                        elif (att == "fcst"):
                            indices["fcst"] = i
                        else:
                            indices[att] = i

                    # Ensure we have required columns
                    requiredColumns = ["obs", "fcst"]
                    for col in requiredColumns:
                        if (col not in indices):
                            msg = "Could not parse %s: Missing column '%s'" % (
                                filename, col)
                            Util.error(msg)
                else:
                    if (len(row) is not len(header)):
                        Util.error(
                            "Incorrect number of columns (expecting %d) in row '%s'"
                            % (len(header), rowstr.strip()))
                    if ("date" in indices):
                        date = self._clean(row[indices["date"]])
                    self._dates.add(date)
                    if ("offset" in indices):
                        offset = self._clean(row[indices["offset"]])
                    self._offsets.add(offset)
                    if ("id" in indices):
                        id = self._clean(row[indices["id"]])
                    else:
                        id = np.nan
                    if ("lat" in indices):
                        lat = self._clean(row[indices["lat"]])
                    if ("lon" in indices):
                        lon = self._clean(row[indices["lon"]])
                    if ("elev" in indices):
                        elev = self._clean(row[indices["elev"]])
                    station = Station.Station(id, lat, lon, elev)
                    self._stations.add(station)
                    key = (date, offset, lat, lon, elev)
                    obs[key] = self._clean(row[indices["obs"]])
                    fcst[key] = self._clean(row[indices["fcst"]])
                    quantileFields = self._getQuantileFields(header)
                    thresholdFields = self._getThresholdFields(header)
                    if "pit" in indices:
                        pit[key] = self._clean(row[indices["pit"]])
                    for field in quantileFields:
                        quantile = float(field[1:])
                        self._quantiles.add(quantile)
                        key = (date, offset, lat, lon, elev, quantile)
                        x[key] = self._clean(row[indices[field]])
                    for field in thresholdFields:
                        threshold = float(field[1:])
                        self._thresholds.add(threshold)
                        key = (date, offset, lat, lon, elev, threshold)
                        cdf[key] = self._clean(row[indices[field]])
        end = time.time()
        file.close()
        self._dates = list(self._dates)
        self._offsets = list(self._offsets)
        self._stations = list(self._stations)
        self._quantiles = list(self._quantiles)
        self._thresholds = np.array(list(self._thresholds))
        Ndates = len(self._dates)
        Noffsets = len(self._offsets)
        Nlocations = len(self._stations)
        Nquantiles = len(self._quantiles)
        Nthresholds = len(self._thresholds)

        # Put the dictionary data into a regular 3D array
        self._obs = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan
        self._fcst = np.zeros([Ndates, Noffsets, Nlocations], 'float') * np.nan
        if (len(pit) != 0):
            self._pit = np.zeros([Ndates, Noffsets, Nlocations],
                                 'float') * np.nan
        self._cdf = np.zeros([Ndates, Noffsets, Nlocations, Nthresholds],
                             'float') * np.nan
        self._x = np.zeros([Ndates, Noffsets, Nlocations, Nquantiles],
                           'float') * np.nan
        for d in range(0, len(self._dates)):
            date = self._dates[d]
            end = time.time()
            for o in range(0, len(self._offsets)):
                offset = self._offsets[o]
                for s in range(0, len(self._stations)):
                    station = self._stations[s]
                    lat = station.lat()
                    lon = station.lon()
                    elev = station.elev()
                    key = (date, offset, lat, lon, elev)
                    if (key in obs):
                        self._obs[d][o][s] = obs[key]
                    if (key in fcst):
                        self._fcst[d][o][s] = fcst[key]
                    if (key in pit):
                        self._pit[d][o][s] = pit[key]
                    for q in range(0, len(self._quantiles)):
                        quantile = self._quantiles[q]
                        key = (date, offset, lat, lon, elev, quantile)
                        if (key in x):
                            self._x[d, o, s, q] = x[key]
                    for t in range(0, len(self._thresholds)):
                        threshold = self._thresholds[t]
                        key = (date, offset, lat, lon, elev, threshold)
                        if (key in cdf):
                            self._cdf[d, o, s, t] = cdf[key]
        end = time.time()
        maxStationId = np.nan
        for station in self._stations:
            if (np.isnan(maxStationId)):
                maxStationId = station.id()
            elif (station.id() > maxStationId):
                maxStationId = station.id()

        counter = 0
        if (not np.isnan(maxStationId)):
            counter = maxStationId + 1

        for station in self._stations:
            if (np.isnan(station.id())):
                station.id(counter)
                counter = counter + 1
        self._dates = np.array(self._dates)
        self._offsets = np.array(self._offsets)

    # Parse string into float, changing -999 into np.nan
    def _clean(self, value):
        fvalue = float(value)
        if (fvalue == -999):
            fvalue = np.nan
        return fvalue

    def _getQuantileFields(self, fields):
        quantiles = list()
        for att in fields:
            if (att[0] == "q"):
                quantiles.append(att)
        return quantiles

    def _getThresholdFields(self, fields):
        thresholds = list()
        for att in fields:
            if (att[0] == "p" and att != "pit"):
                thresholds.append(att)
        return thresholds

    def getThresholds(self):
        return self._thresholds

    def getQuantiles(self):
        return self._quantiles

    def getName(self):
        return "Unknown"

    def getStations(self):
        return self._stations

    def getScores(self, metric):
        if (metric == "obs"):
            return self._obs
        elif (metric == "fcst"):
            return self._fcst
        elif (metric == "pit"):
            if (self._pit is None):
                Util.error("File does not contain 'pit'")
            return self._pit
        elif (metric[0] == "p"):
            threshold = float(metric[1:])
            I = np.where(abs(self._thresholds - threshold) < 0.0001)[0]
            if (len(I) == 0):
                Util.error("Cannot find " + metric)
            elif (len(I) > 1):
                Util.error("Could not find unique threshold: " +
                           str(threshold))
            return self._cdf[:, :, :, I[0]]
        elif (metric[0] == "q"):
            quantile = float(metric[1:])
            I = np.where(abs(self._quantiles - quantile) < 0.0001)[0]
            if (len(I) == 0):
                Util.error("Cannot find " + metric)
            elif (len(I) > 1):
                Util.error("Could not find unique quantile: " + str(quantile))
            return self._x[:, :, :, I[0]]
        elif (metric == "Offset"):
            return self._offsets
        elif (metric == "Date"):
            return self._dates
        elif (metric == "Location"):
            stations = np.zeros(len(self._stations), 'float')
            for i in range(0, len(self._stations)):
                stations[i] = self._stations[i].id()
            return stations
        elif (metric in ["Lat", "Lon", "Elev"]):
            values = np.zeros(len(self._stations), 'float')
            for i in range(0, len(self._stations)):
                station = self._stations[i]
                if (metric == "Lat"):
                    values[i] = station.lat()
                elif (metric == "Lon"):
                    values[i] = station.lon()
                elif (metric == "Elev"):
                    values[i] = station.elev()
            return values
        else:
            Util.error("Cannot find " + metric)

    def getDims(self, metric):
        if (metric in ["Date", "Offset", "Location"]):
            return [metric]
        elif (metric in ["Lat", "Lon", "Elev"]):
            return ["Location"]
        else:
            return ["Date", "Offset", "Location"]

    def getDates(self):
        return self._dates

    def getOffsets(self):
        return self._offsets

    def getMetrics(self):
        metrics = ["obs", "fcst"]
        for quantile in self._quantiles:
            metrics.append("q%g" % quantile)
        for threshold in self._thresholds:
            metrics.append("p%g" % threshold)
        if (self._pit is not None):
            metrics.append("pit")
        return metrics

    def getQuantiles(self):
        return self._quantiles

    def getVariables(self):
        metrics = self.getMetrics() + [
            "Date", "Offset", "Location", "Lat", "Lon", "Elev"
        ]
        return metrics

    def getUnits(self):
        return self._units

    def getVariable(self):
        return self._variable

    @staticmethod
    def isValid(filename):
        return True