示例#1
0
def combine(average, weight, new, new_weight, first_year, last_year,
            min_overlap):
    """Run the GISTEMP combining algorithm.  This combines the data
    in the *new* array into the *average* array.  *new* has weight
    *new_weight*, *average* has weights in the *weight* array.

    Only data for years in *range(first_year, last_year)* are
    considered and combined.

    *new_weight* can be either a constant or an array of weights for
     each datum in *new*.

    The number of month records combined is returned.

    Each month of the year is considered separately.  For the set of
    times where both *average* and *new* have data the mean difference
    (a bias) is computed.  If there are fewer than *min_overlap* years
    in common the data (for that month of the year) are not combined.
    The bias is subtracted from the *new* record and it is point-wise
    combined into *average* according to the weight *new_weight* and
    the existing weights for *average*.
    """

    new_weight = container(new_weight)

    months_combined = 0
    for m in range(12):
        sum_new = 0.0  # Sum of data in new
        sum = 0.0  # Sum of data in average
        count = 0  # Number of years where both new and average are valid
        for a, n in itertools.izip(
                average[first_year * 12 + m:last_year * 12:12],
                new[first_year * 12 + m:last_year * 12:12]):
            if invalid(a) or invalid(n):
                continue
            count += 1
            sum += a
            sum_new += n
        if count < min_overlap:
            continue
        bias = (sum - sum_new) / count

        # Update period of valid data, averages and weights
        for i in range(first_year * 12 + m, last_year * 12, 12):
            if invalid(new[i]):
                continue
            new_month_weight = weight[i] + new_weight[i]
            average[i] = (weight[i] * average[i] + new_weight[i] *
                          (new[i] + bias)) / new_month_weight
            weight[i] = new_month_weight
            months_combined += 1
    return months_combined
示例#2
0
def combine(average, weight, new, new_weight,
            first_year, last_year, min_overlap):
    """Run the GISTEMP combining algorithm.  This combines the data
    in the *new* array into the *average* array.  *new* has weight
    *new_weight*, *average* has weights in the *weight* array.

    Only data for years in *range(first_year, last_year)* are
    considered and combined.

    *new_weight* can be either a constant or an array of weights for
     each datum in *new*.

    The number of month records combined is returned.

    Each month of the year is considered separately.  For the set of
    times where both *average* and *new* have data the mean difference
    (a bias) is computed.  If there are fewer than *min_overlap* years
    in common the data (for that month of the year) are not combined.
    The bias is subtracted from the *new* record and it is point-wise
    combined into *average* according to the weight *new_weight* and
    the existing weights for *average*.
    """

    new_weight = container(new_weight)

    months_combined = 0
    for m in range(12):
        sum_new = 0.0    # Sum of data in new
        sum = 0.0        # Sum of data in average
        count = 0    # Number of years where both new and average are valid
        for a,n in itertools.izip(average[first_year*12+m: last_year*12: 12],
                                  new[first_year*12+m: last_year*12: 12]):
            if invalid(a) or invalid(n):
                continue
            count += 1
            sum += a
            sum_new += n
        if count < min_overlap:
            continue
        bias = (sum-sum_new)/count

        # Update period of valid data, averages and weights
        for i in range(first_year*12+m, last_year*12, 12):
            if invalid(new[i]):
                continue
            new_month_weight = weight[i] + new_weight[i]
            average[i] = (weight[i]*average[i]
                          + new_weight[i]*(new[i]+bias))/new_month_weight
            weight[i] = new_month_weight
            months_combined += 1
    return months_combined
示例#3
0
def combine(composite, weight, new, new_weight, min_overlap):
    """Run the GISTEMP combining algorithm.  This combines the data
    in the *new* array into the *composite* array.  *new* has weight
    *new_weight*; *composite* has weights in the *weight* array.

    *new_weight* can be either a constant or an array of weights for
     each datum in *new*.

    For each of the 12 months of the year, track is kept of how many
    new data are combined.  This list of 12 elements is returned.

    Each month of the year is considered separately.  For the set of
    times where both *composite* and *new* have data the mean difference
    (a bias) is computed.  If there are fewer than *min_overlap* years
    in common the data (for that month of the year) are not combined.
    The bias is subtracted from the *new* record and it is point-wise
    combined into *composite* according to the weight *new_weight* and
    the existing weights for *composite*.
    """

    new_weight = ensure_array(weight, new_weight)

    # A count (of combined data) for each month.
    data_combined = [0] * 12
    for m in range(12):
        sum_new = 0.0    # Sum of data in new
        sum = 0.0        # Sum of data in composite
        # Number of years where both new and composite are valid.
        count = 0
        for a,n in itertools.izip(composite[m::12],
                                  new[m::12]):
            if invalid(a) or invalid(n):
                continue
            count += 1
            sum += a
            sum_new += n
        if count < min_overlap:
            continue
        bias = (sum-sum_new)/count

        # Update period of valid data, composite and weights.
        for i in range(m, len(new), 12):
            if invalid(new[i]):
                continue
            new_month_weight = weight[i] + new_weight[i]
            composite[i] = (weight[i]*composite[i]
                          + new_weight[i]*(new[i]+bias))/new_month_weight
            weight[i] = new_month_weight
            data_combined[m] += 1
    return data_combined
示例#4
0
def trend2(points, xmid, min):
    """Finds a fit to the data *points[]*, using regression analysis,
    by a line with a change in slope at *xmid*. Returned is a 4-tuple
    (*sl1*, *sl2*, *rms*, *sl*): the left-hand slope, the right-hand
    slope, the RMS error, and the slope of an overall linear fit.
    """

    # Todo: incorporate into getfit.
    count0 = count1 = 0
    sx0 = sx1 = 0
    sxx0 = sxx1 = 0
    sxa0 = sxa1 = 0

    sa = 0.0
    saa = 0.0

    for (x, v) in points:
        if invalid(v):
            continue
        x -= xmid
        sa += v
        saa += v**2
        if x > 0.0:
            count1 += 1
            sx1 += x
            sxx1 += x**2
            sxa1 += x * v
        else:
            count0 += 1
            sx0 += x
            sxx0 += x**2
            sxa0 += x * v

    if count0 < min or count1 < min:
        return MISSING, MISSING, MISSING, MISSING

    count = count0 + count1
    denom = (count * sxx0 * sxx1 - sxx0 * sx1**2 - sxx1 * sx0**2)
    sl1 = (sx0 * (sx1 * sxa1 - sxx1 * sa) + sxa0 *
           (count * sxx1 - sx1**2)) / denom
    sl2 = (sx1 * (sx0 * sxa0 - sxx0 * sa) + sxa1 *
           (count * sxx0 - sx0**2)) / denom

    ymid = (sa - sl1 * sx0 - sl2 * sx1) / count
    rms = (count * ymid**2 + saa - 2 * ymid * (sa - sl1 * sx0 - sl2 * sx1) +
           sl1 * sl1 * sxx0 + sl2 * sl2 * sxx1 - 2 * sl1 * sxa0 -
           2 * sl2 * sxa1)

    # linear regression
    sx = sx0 + sx1
    sxx = sxx0 + sxx1
    sxa = sxa0 + sxa1
    sl = (count * sxa - sa * sx) / (count * sxx - sx**2)

    return sl1, sl2, rms, sl
示例#5
0
def merge_ocean(ocean, sst, dates):
    """Adds the array *sst* of new monthly sea-surface temperature readings,
    which has data for the dates *dates*, to the boxed iterator *ocean*.
    Returns a new boxed iterator.
    """

    clim = giss_io.step4_load_clim()

    first_new_year = dates[0][0]
    last_new_year = dates[-1][0]
    last_new_month = dates[-1][1]

    reader = iter(ocean)
    meta = reader.next()
    meta.monm = 12 * (last_new_year - IYRBEG + 1)
    meta.monm4 = meta.monm + 8
    meta.title = (meta.title[:40] +
                  " Had: 1880-11/1981, oi2: 12/1981-%2d/%04d" %
                  (last_new_month, last_new_year))
    yield meta

    # Average into Sergej's subbox grid
    for box in reader:
        box.pad_with_missing(meta.monm)

        # identify all the degree boxes which are included in this subbox
        js = int(box.lat_S + 90.01)
        jn = int(box.lat_N + 89.99)
        iw = int(box.lon_W + 360.01)
        ie = int(box.lon_E + 359.99)
        if ie >= 360:
            iw = iw - 360
            ie = ie - 360

        for y, m in dates:
            mm = (y - first_new_year) * 12 + m
            month = (m - 1) % 12
            count = 0
            sum = 0.0
            for j in range(js, jn + 1):
                for i in range(iw, ie + 1):
                    if (sst[i][j][mm - 1] < parameters.sea_surface_cutoff_temp
                            or invalid(clim[i][j][month])):
                        continue
                    count += 1
                    sum += sst[i][j][mm - 1] - clim[i][j][month]

            index = (y - IYRBEG) * 12 + m - 1
            box.set_value(index, MISSING)
            if count > 0:
                box.set_value(index, sum / count)

        box.trim()
        yield box
示例#6
0
def merge_ocean(ocean, sst, dates):
    """Adds the array *sst* of new monthly sea-surface temperature readings,
    which has data for the dates *dates*, to the boxed iterator *ocean*.
    Returns a new boxed iterator.
    """

    clim = giss_io.step4_load_clim()

    first_new_year = dates[0][0]
    last_new_year = dates[-1][0]
    last_new_month = dates[-1][1]

    reader = iter(ocean)
    meta = reader.next()
    meta.monm = 12 * (last_new_year - IYRBEG + 1)
    meta.monm4 = meta.monm + 8
    meta.title = (meta.title[:40] +
                  " Had: 1880-11/1981, oi2: 12/1981-%2d/%04d" %
                  (last_new_month, last_new_year))
    yield meta

    # Average into Sergej's subbox grid
    for box in reader:
        box.pad_with_missing(meta.monm)

        # identify all the degree boxes which are included in this subbox
        js = int(box.lat_S + 90.01)
        jn = int(box.lat_N + 89.99)
        iw = int(box.lon_W + 360.01)
        ie = int(box.lon_E + 359.99)
        if ie >= 360:
            iw = iw - 360
            ie = ie - 360

        for y, m in dates:
            mm = (y - first_new_year) * 12 + m
            month = (m - 1) % 12
            count = 0
            sum = 0.0
            for j in range(js, jn+1):
                for i in range(iw, ie+1):
                    if (sst[i][j][mm-1] < parameters.sea_surface_cutoff_temp
                        or invalid(clim[i][j][month])):
                        continue
                    count += 1
                    sum += sst[i][j][mm-1] - clim[i][j][month]

            index = (y - IYRBEG) * 12 + m - 1
            box.set_value(index, MISSING)
            if count > 0:
                box.set_value(index, sum / count)

        box.trim()
        yield box
示例#7
0
def combine(sums, wgts, begin, years, records, log, new_id=None):
    while records:
        record, rec_id, diff = get_longest_overlap(average(sums, wgts, years),
                                                   begin, records)
        if invalid(diff):
            log.write("\tno other records okay\n")
            return
        del records[rec_id]
        add(sums, wgts, diff, begin, record)
        log.write("\t %s %d %d %f\n" % (rec_id, record.first_year,
                                        record.last_year - 1, diff))
示例#8
0
def cmbine(combined, weights, counts, data, first, last, weight):
    """Adds the array *data* with weight *weight* into the array of
    weighted averages *combined*, with total weights *weights* and
    combined counts *counts* (that is, entry *combined[i]* is the
    result of combining *counts[i]* values with total weights
    *weights[i]*).  Adds the computed bias between *combined* and
    *data* before combining.

    Only combines in the range [*first*, *last*); only combines valid
    values from *data*, and if there are fewer than
    *parameters.rural_station_min_overlap* entries valid in both
    arrays then it doesn't combine at all.

    Note: if *data[i]* is valid and *combined[i]* is not, the weighted
    average code runs and still produces the right answer, because
    *weights[i]* will be zero.
    """
    sumn = ncom = 0
    avg_sum = 0.0
    a, b = first - 1, last
    for v_avg, v_new in itertools.izip(combined[a:b], data[a:b]):
        if invalid(v_avg) or invalid(v_new):
            continue
        ncom = ncom + 1
        avg_sum += v_avg
        sumn += v_new

    if ncom < parameters.rural_station_min_overlap:
        return
    bias = (avg_sum - sumn) / float(ncom)

    # update period of valid data, averages and weights
    for n in xrange(first - 1, last):
        v_new = data[n]
        if invalid(v_new):
            continue
        wtnew = weights[n] + weight
        old_wt, weights[n] = weights[n], wtnew
        combined[n] = (old_wt * combined[n] + weight * (v_new + bias)) / wtnew
        counts[n] += 1
示例#9
0
def cmbine(combined, weights, counts, data, first, last, weight):
    """Adds the array *data* with weight *weight* into the array of
    weighted averages *combined*, with total weights *weights* and
    combined counts *counts* (that is, entry *combined[i]* is the
    result of combining *counts[i]* values with total weights
    *weights[i]*).  Adds the computed bias between *combined* and
    *data* before combining.

    Only combines in the range [*first*, *last*); only combines valid
    values from *data*, and if there are fewer than
    *parameters.rural_station_min_overlap* entries valid in both
    arrays then it doesn't combine at all.

    Note: if *data[i]* is valid and *combined[i]* is not, the weighted
    average code runs and still produces the right answer, because
    *weights[i]* will be zero.
    """
    sumn = ncom = 0
    avg_sum = 0.0
    a, b = first - 1, last
    for v_avg, v_new in itertools.izip(combined[a:b], data[a:b]):
        if invalid(v_avg) or invalid(v_new):
            continue
        ncom = ncom + 1
        avg_sum += v_avg
        sumn += v_new

    if ncom < parameters.rural_station_min_overlap:
        return
    bias = (avg_sum - sumn) / float(ncom)

    # update period of valid data, averages and weights
    for n in xrange(first - 1, last):
        v_new = data[n]
        if invalid(v_new):
            continue
        wtnew = weights[n] + weight
        old_wt, weights[n] = weights[n], wtnew
        combined[n] = (old_wt * combined[n] + weight * (v_new + bias)) / wtnew
        counts[n] += 1
示例#10
0
def get_longest_overlap(new_data, begin, records):
    """Find the record in the *records* dict that has the longest
    overlap with the *new_data* by considering annual anomalies.
    """

    ann_mean, ann_anoms = monthly_annual(new_data)
    overlap = 0
    # :todo: the records are consulted in an essentially arbitrary
    # order (chosen by the implementation of items()), but the order
    # may affect the result.
    # Tie breaks go to the last record consulted.
    for rec_id, record in records.items():
        rec_ann_anoms = record.ann_anoms
        rec_ann_mean = record.ann_mean
        rec_years = record.last_year - record.first_year + 1
        rec_begin = record.first_year
        sum = wgt = 0
        for n in range(rec_years):
            rec_anom = rec_ann_anoms[n]
            if invalid(rec_anom):
                continue
            year = n + rec_begin
            anom = ann_anoms[year - begin]
            if invalid(anom):
                continue
            wgt += 1
            sum += (rec_ann_mean + rec_anom) - (ann_mean + anom)
        if wgt < parameters.station_combine_min_overlap:
            continue
        if wgt < overlap:
            continue
        overlap = wgt
        diff = sum / wgt
        best_id = rec_id
        best_record = record
    if overlap < parameters.station_combine_min_overlap:
        return 0, 0, MISSING
    return best_record, best_id, diff
示例#11
0
def pieces_get_longest_overlap(new_data, begin, records):
    ann_mean, ann_anoms = monthly_annual(new_data)
    overlap = 0
    for rec_id, record in records.items():
        rec_ann_anoms = record.ann_anoms
        rec_years = record.last_year - record.first_year + 1
        rec_begin = record.first_year
        wgt = 0
        for n in range(rec_years):
            rec_anom = rec_ann_anoms[n]
            if invalid(rec_anom):
                continue
            year = n + rec_begin
            anom = ann_anoms[year - begin]
            if invalid(anom):
                continue
            wgt = wgt + 1
        if wgt < overlap:
            continue
        overlap = wgt
        best_id = rec_id
        best_record = record
    return best_record, best_id
示例#12
0
def monthly_anomalies(data, reference_period=None, base_year=-9999):
    """Calculate monthly anomalies, by subtracting from every datum
    the mean for its month.  A pair of (monthly_mean, monthly_anom) is
    returned.  *monthly_mean* is a 12-long sequence giving the mean for
    each of the 12 months; *monthly_anom* is a 12-long sequence giving
    the anomalized series for each of the 12 months.

    If *reference_period* is supplied then it should be a pair (*first*,
    *last) and the mean for a month is taken over the period (an example
    would be reference_period=(1951,1980)).  *base_year* specifies the
    first year of the data.
    
    The input data is a flat sequence, one datum per month.
    Effectively the data changes shape as it passes through this
    function.
    """

    years = len(data) // 12
    if reference_period:
        base = reference_period[0] - base_year
        limit = reference_period[1] - base_year + 1
    else:
        # Setting base, limit to (0,0) is a bit of a hack, but it
        # does work.
        base = 0
        limit = 0
    monthly_mean = []
    monthly_anom = []
    for m in range(12):
        row = data[m::12]
        mean = valid_mean(row[base:limit])
        if invalid(mean):
            # Fall back to using entire period
            mean = valid_mean(row)
        monthly_mean.append(mean)
        if valid(mean):

            def asanom(datum):
                """Convert a single datum to anomaly."""
                if valid(datum):
                    return datum - mean
                return MISSING

            monthly_anom.append(map(asanom, row))
        else:
            monthly_anom.append([MISSING] * years)
    return monthly_mean, monthly_anom
示例#13
0
def add(sums, wgts, diff, begin, record):
    """Add the data from *record* to the *sums* and *wgts* arrays, first
    shifting it by subtracting *diff*."""

    rec_begin = record.first_year
    rec_years = record.last_year - record.first_year + 1
    rec_data = record.series
    assert len(rec_data) == 12*rec_years
    offset = rec_begin - begin
    offset *= 12
    for i in range(len(rec_data)):
        datum = rec_data[i]
        if invalid(datum):
            continue
        index = i + offset
        sums[index] += datum - diff
        wgts[index] += 1
示例#14
0
def monthly_anomalies(data, reference_period=None, base_year=-9999):
    """Calculate monthly anomalies, by subtracting from every datum
    the mean for its month.  A pair of (monthly_mean, monthly_anom) is
    returned.  *monthly_mean* is a 12-long sequence giving the mean for
    each of the 12 months; *monthly_anom* is a 12-long sequence giving
    the anomalized series for each of the 12 months.

    If *reference_period* is supplied then it should be a pair (*first*,
    *last) and the mean for a month is taken over the period (an example
    would be reference_period=(1951,1980)).  *base_year* specifies the
    first year of the data.
    
    The input data is a flat sequence, one datum per month.
    Effectively the data changes shape as it passes through this
    function.
    """

    years = len(data) // 12
    if reference_period:
        base = reference_period[0] - base_year
        limit = reference_period[1] - base_year + 1
    else:
        # Setting base, limit to (0,0) is a bit of a hack, but it
        # does work.
        base = 0
        limit = 0
    monthly_mean = []
    monthly_anom = []
    for m in range(12):
        row = data[m::12]
        mean = valid_mean(row[base:limit])
        if invalid(mean):
            # Fall back to using entire period
            mean = valid_mean(row)
        monthly_mean.append(mean)
        if valid(mean):
            def asanom(datum):
                """Convert a single datum to anomaly."""
                if valid(datum):
                    return datum - mean
                return MISSING
            monthly_anom.append(map(asanom, row))
        else:
            monthly_anom.append([MISSING]*years)
    return monthly_mean, monthly_anom
示例#15
0
def adjust_helena(stream):
    """Modifies records as specified in config/combine_pieces_helena.in,
    by adding the delta to every datum for that station prior to the
    specified month.
    """
    helena_ds = read_config.get_helena_dict()
    for record in stream:
        id = record.uid
        if helena_ds.has_key(id):
            series = record.series
            this_year, month, summand = helena_ds[id]
            begin = record.first_year
            # Index of month specified by helena_ds
            M = (this_year - begin)*12 + month
            # All valid data up to and including M get adjusted
            for i in range(M+1):
                datum = series[i]
                if invalid(datum):
                    continue
                series[i] += summand
            record.set_series(record.first_month, series)
            del helena_ds[id]
        yield record
示例#16
0
def trend2(points, xmid, min):
    """Finds a fit to the data *points[]*, using regression analysis,
    by a line with a change in slope at *xmid*. Returned is a 4-tuple
    (*sl1*, *sl2*, *rms*, *sl*): the left-hand slope, the right-hand
    slope, the RMS error, and the slope of an overall linear fit.
    """

    # Todo: incorporate into getfit.
    count0 = count1 = 0
    sx0 = sx1 = 0
    sxx0 = sxx1 = 0
    sxa0 = sxa1 = 0

    sa = 0.0
    saa = 0.0

    for (x,v) in points:
        if invalid(v):
            continue
        x -= xmid
        sa += v
        saa += v ** 2
        if x > 0.0:
            count1 += 1
            sx1 += x
            sxx1 += x ** 2
            sxa1 += x * v
        else:
            count0 += 1
            sx0 += x
            sxx0 += x ** 2
            sxa0 += x * v

    if count0 < min or count1 < min:
       return MISSING, MISSING, MISSING, MISSING

    count = count0 + count1
    denom = (count * sxx0 * sxx1
             - sxx0 * sx1 ** 2
             - sxx1 * sx0 ** 2)
    sl1 = (sx0 * (sx1 * sxa1 - sxx1 * sa)
           + sxa0 * (count * sxx1 - sx1 ** 2)) / denom
    sl2 = (sx1 * (sx0 * sxa0 - sxx0 * sa)
           + sxa1 * (count * sxx0 - sx0 ** 2)) / denom

    ymid = (sa - sl1 * sx0 - sl2 * sx1) / count
    rms = (count * ymid ** 2
           + saa
           - 2 * ymid * (sa - sl1 * sx0 - sl2 * sx1)
           + sl1 * sl1 * sxx0
           + sl2 * sl2 * sxx1
           - 2 * sl1 * sxa0
           - 2 * sl2 * sxa1)

    # linear regression
    sx = sx0 + sx1
    sxx = sxx0 + sxx1
    sxa = sxa0 + sxa1
    sl = (count * sxa - sa * sx) / (count * sxx - sx ** 2)

    return sl1, sl2, rms, sl