示例#1
0
def prepare_series(iy1, iyrm, combined, urban_series, counts, iyoff):
    """Prepares for the linearity fitting by returning a series of
    data points *(x,f)*, where *x* is a year number and *f* is the
    difference between the combined rural station anomaly series
    *combined* and the urban station series *urban_series*.  The
    points only include valid years, from the first quorate year to
    the last.  A valid year is one in which both the urban station and
    the combined rural series have valid data.  A quorate year is a
    valid year in which there are at least
    *parameters.urban_adjustment_min_rural_stations* contributing
    (obtained from the *counts* series).

    Returns a 4-tuple: (*p*, *c*, *f*, *l*). *p* is the series of
    points, *c* is a count of the valid quorate years.  *f* is the
    first such year.  *l* is the last such year.
    """
    first = last = i = quorate_count = length = 0
    points = []

    for iy in xrange(iy1 - 1, iyrm):
        if valid(combined[iy]) and valid(urban_series[iy]):
            if counts[iy] >= parameters.urban_adjustment_min_rural_stations:
                last = iy + 1
                quorate_count += 1
                if first == 0:
                    first = iy + 1
            if quorate_count <= 0:
                continue

            points.append((iy + iyoff + 1, combined[iy] - urban_series[iy]))
            i += 1
            if counts[iy] >= parameters.urban_adjustment_min_rural_stations:
                length = i

    return points[:length], quorate_count, first, last
示例#2
0
def prepare_series(iy1, iyrm, combined, urban_series, counts, iyoff):
    """Prepares for the linearity fitting by returning a series of
    data points *(x,f)*, where *x* is a year number and *f* is the
    difference between the combined rural station anomaly series
    *combined* and the urban station series *urban_series*.  The
    points only include valid years, from the first quorate year to
    the last.  A valid year is one in which both the urban station and
    the combined rural series have valid data.  A quorate year is a
    valid year in which there are at least
    *parameters.urban_adjustment_min_rural_stations* contributing
    (obtained from the *counts* series).

    Returns a 4-tuple: (*p*, *c*, *f*, *l*). *p* is the series of
    points, *c* is a count of the valid quorate years.  *f* is the
    first such year.  *l* is the last such year.
    """
    first = last = i = quorate_count = length = 0
    points = []

    for iy in xrange(iy1 - 1, iyrm):
        if valid(combined[iy]) and valid(urban_series[iy]):
            if counts[iy] >= parameters.urban_adjustment_min_rural_stations:
                last = iy + 1
                quorate_count += 1
                if first == 0:
                    first = iy + 1
            if quorate_count <= 0:
                continue

            points.append((iy + iyoff + 1, combined[iy] - urban_series[iy]))
            i += 1
            if counts[iy] >= parameters.urban_adjustment_min_rural_stations:
                 length = i

    return points[:length], quorate_count, first, last
示例#3
0
def fresh_arrays(record, begin, years):
    """Make and return a fresh set of sums, and wgts arrays.  Each
    array is list (of length 12 * years).

    *begin* should be the starting year for the arrays, which must
    be no later than the starting year for the record.
    """

    nmonths = years * 12

    rec_data = record.series
    rec_begin = record.first_year
    rec_years = record.last_year - record.first_year + 1
    # Number of months in record.
    rec_months = rec_years * 12
    assert rec_months == record.n
    assert rec_months == len(rec_data)
    # The record may begin at a later year from the arrays we are
    # creating, so we need to offset it when we copy.
    offset = rec_begin - begin
    assert offset >= 0
    offset *= 12

    sums = [0.0] * nmonths
    # Copy valid data rec_data into sums, assigning 0 for invalid data.
    sums[offset:offset+rec_months] = (valid(x)*x for x in rec_data)
    # Let wgts[i] be 1 where sums[i] is valid.
    wgts = [0] * nmonths
    wgts[offset:offset+rec_months] = (int(valid(x)) for x in rec_data)

    return sums, wgts
示例#4
0
def prepare_series(from_year, combined, urban_series, counts):
    """Prepares for the linearity fitting by returning a series of
    data points *(x,d)*, where *x* is a calendar year number and *d*
    is the difference between the combined rural station anomaly series
    *combined* and the urban station series *urban_series* (each of
    these is an annual series, one datum per year).
    
    The returned points only include valid years, from the first
    quorate year to the last quorate year.  A valid year is one in
    which both the urban station and the combined rural series have
    valid data.  A quorate year is a valid year in which there are
    at least *parameters.urban_adjustment_min_rural_stations*
    contributing.

    The algorithm is restricted to only considering years starting at
    *from_year* (and ending at the end of the series); it is a
    calendar year.

    The *counts* argument is a sequence that contains the number of
    stations contributing to each datum in *combined*.

    Returns a tuple: (*points*, *count*). *points* is the series of
    points, *count* is a count of the valid quorate years.
    """

    # Calendar year corresponding to first datum in series.
    year_offset = giss_data.BASE_YEAR
    # Number of valid quorate years
    quorate_count = 0
    # Used to truncate the series to the last quorate year, immediately
    # before returning it.
    length = 0
    points = []

    assert len(combined) >= len(urban_series)

    def quorate():
        """True when *iy* corresponds to a quorate year; used in inner
        loop, below."""
        return counts[iy] >= parameters.urban_adjustment_min_rural_stations

    for iy in xrange(from_year - year_offset, len(urban_series)):
        if valid(combined[iy]) and valid(urban_series[iy]):
            if quorate():
                quorate_count += 1
            if quorate_count == 0:
                continue

            points.append((iy + year_offset, combined[iy] - urban_series[iy]))
            if quorate():
                length = len(points)

    return points[:length], quorate_count
def prepare_series(from_year, combined, urban_series, counts):
    """Prepares for the linearity fitting by returning a series of
    data points *(x,d)*, where *x* is a calendar year number and *d*
    is the difference between the combined rural station anomaly series
    *combined* and the urban station series *urban_series* (each of
    these is an annual series, one datum per year).
    
    The returned points only include valid years, from the first
    quorate year to the last quorate year.  A valid year is one in
    which both the urban station and the combined rural series have
    valid data.  A quorate year is a valid year in which there are
    at least *parameters.urban_adjustment_min_rural_stations*
    contributing.

    The algorithm is restricted to only considering years starting at
    *from_year* (and ending at the end of the series); it is a
    calendar year.

    The *counts* argument is a sequence that contains the number of
    stations contributing to each datum in *combined*.

    Returns a tuple: (*points*, *count*). *points* is the series of
    points, *count* is a count of the valid quorate years.
    """

    # Calendar year corresponding to first datum in series.
    year_offset = giss_data.BASE_YEAR
    # Number of valid quorate years
    quorate_count = 0
    # Used to truncate the series to the last quorate year, immediately
    # before returning it.
    length = 0
    points = []

    assert len(combined) >= len(urban_series)

    def quorate():
        """True when *iy* corresponds to a quorate year; used in inner
        loop, below."""
        return counts[iy] >= parameters.urban_adjustment_min_rural_stations

    for iy in xrange(from_year - year_offset, len(urban_series)):
        if valid(combined[iy]) and valid(urban_series[iy]):
            if quorate():
                quorate_count += 1
            if quorate_count == 0:
                continue

            points.append((iy + year_offset, combined[iy] - urban_series[iy]))
            if quorate():
                 length = len(points)

    return points[:length], quorate_count
示例#6
0
def annual_anomaly(record):
    """Updates the station record *record* with attributes .first,
    .last, and .anomalies.  The algorithm is as follows: compute
    monthly averages, then monthly anomalies, then seasonal anomalies
    (means of monthly anomalies for at least two months) then annual
    anomalies (means of seasonal anomalies for at least three
    seasons).
    """

    series = record.series
    monthly_means = []
    for m in range(12):
        month_data = filter(valid, series[m::12])
        # neglect December of final year, as we do not use its season.
        if m == 11 and valid(series[-1]):
            month_data = month_data[:-1]
        monthly_means.append(float(sum(month_data)) / len(month_data))
    annual_anoms = []
    first = None
    for y in range(len(series) / 12):
        # Seasons are Dec-Feb, Mar-May, Jun-Aug, Sep-Nov.
        # (Dec from previous year).
        total = [0.0] * 4  # total monthly anomaly for each season
        count = [0] * 4  # number of valid months in each season
        for m in range(-1, 11):
            index = y * 12 + m
            if index >= 0:  # no Dec value in year -1
                datum = series[index]
                if valid(datum):
                    season = (m + 1) // 3  # season number 0-3
                    total[season] += datum - monthly_means[(m + 12) % 12]
                    count[season] += 1
        season_anomalies = []  # list of valid seasonal anomalies
        for s in range(4):
            # valid seasonal anomaly requires at least 2 valid months
            if count[s] > 1:
                season_anomalies.append(total[s] / count[s])
        # valid annual anomaly requires at least 3 valid seasons
        if len(season_anomalies) > 2:
            annual_anoms.append(sum(season_anomalies) / len(season_anomalies))
            if first is None:
                first = y
            last = y
        else:
            annual_anoms.append(MISSING)

    if first is None:
        record.anomalies = None
    else:
        record.first = first + record.first_year
        record.last = last + record.first_year
        record.anomalies = annual_anoms[first:last + 1]
示例#7
0
def annual_anomaly(record):
    """Updates the station record *record* with attributes .first,
    .last, and .anomalies.  The algorithm is as follows: compute
    monthly averages, then monthly anomalies, then seasonal anomalies
    (means of monthly anomalies for at least two months) then annual
    anomalies (means of seasonal anomalies for at least three
    seasons).
    """

    series = record.series
    monthly_means = []
    for m in range(12):
        month_data = filter(valid, series[m::12])
        # neglect December of final year, as we do not use its season.
        if m == 11 and valid(series[-1]):
            month_data = month_data[:-1]
        monthly_means.append(float(sum(month_data)) / len(month_data))
    annual_anoms = []
    first = None
    for y in range(len(series)/12):
        # Seasons are Dec-Feb, Mar-May, Jun-Aug, Sep-Nov.
        # (Dec from previous year).
        total = [0.0] * 4 # total monthly anomaly for each season
        count = [0] * 4   # number of valid months in each season
        for m in range(-1, 11):
            index = y * 12 + m
            if index >= 0: # no Dec value in year -1
                datum = series[index]
                if valid(datum):
                    season = (m+1) // 3 # season number 0-3
                    total[season] += datum - monthly_means[(m + 12) % 12]
                    count[season] += 1
        season_anomalies = [] # list of valid seasonal anomalies
        for s in range(4):
            # valid seasonal anomaly requires at least 2 valid months
            if count[s] > 1:
                season_anomalies.append(total[s]/count[s])
        # valid annual anomaly requires at least 3 valid seasons
        if len(season_anomalies) > 2:
            annual_anoms.append(sum(season_anomalies) / len(season_anomalies))
            if first is None:
                first = y
            last = y
        else:
            annual_anoms.append(MISSING)

    if first is None:
        record.anomalies = None
    else:
        record.first = first + record.first_year
        record.last = last + record.first_year
        record.anomalies = annual_anoms[first: last+1]
示例#8
0
def combine_neighbors(us, iyrm, iyoff, neighbors):
    """Combines the neighbor stations *neighbors*, weighted according
    to their distances from the urban station *us*, to give a combined
    annual anomaly series.  Returns a tuple: (*counts*,
    *urban_series*, *combined*), where *counts* is a per-year list of
    the number of stations combined, *urban_series* is the series from
    the urban station, re-based at *iyoff*, and *combined* is the
    combined neighbor series, based at *iyoff*.
    """

    weights = [0.0] * iyrm
    counts = [0] * iyrm
    urban_series = [MISSING] * iyrm
    combined = [MISSING] * iyrm

    urban_series[us.first_year - 1:us.last_year] = us.anomalies

    # start with the neighbor with the longest time record
    rs = neighbors[0]
    combined[rs.first_year - 1:rs.last_year] = rs.anomalies
    for m in range(len(rs.anomalies)):
        if valid(rs.anomalies[m]):
            weights[m + rs.first_year - 1] = rs.weight
            counts[m + rs.first_year - 1] = 1

    # add in the remaining stations
    for i, rs in enumerate(neighbors[1:]):
        dnew = [MISSING] * iyrm
        dnew[rs.first_year - 1: rs.last_year] = rs.anomalies
        cmbine(combined, weights, counts, dnew,
               rs.first_year, rs.last_year, rs.weight)

    return counts, urban_series, combined
示例#9
0
def adjust(first_year, station, series, fit, iy1, iy2,
           iy1a, iy2a, m1, m2, offset):
    (sl1, sl2, knee, sl0) = fit
    if not good_two_part_fit(fit, iy1a, iy2a):
        # Use linear approximation
        sl1, sl2 = sl0, sl0

    base = m1

    m1o, m2o = m1, m2
    m1 = -100
    m0 = 12 * (iy1 - first_year)   # Dec of year iy1
    for iy in range(iy1, iy2 + 1):
        sl = sl1
        if iy > knee:
            sl = sl2
        iya = iy
        if iy < iy1a:
            iya = iy1a
        if iy > iy2a:
            iya = iy2a
        adj = (iya - knee) * sl - (iy2a - knee) * sl2
        for m in range(m0, m0 + 12):
            mIdx = m - base
            if mIdx < 0:
                continue
            if m >= m1o and m <= m2o and valid(series[mIdx + offset]):
                if m1 < 0:
                    m1 = m
                series[mIdx+offset] = series[mIdx+offset] + adj
                m2 = m

        m0 = m0 + 12

    return m1, m2
示例#10
0
def combine_neighbors(us, iyrm, iyoff, neighbors):
    """Combines the neighbor stations *neighbors*, weighted according
    to their distances from the urban station *us*, to give a combined
    annual anomaly series.  Returns a tuple: (*counts*,
    *urban_series*, *combined*), where *counts* is a per-year list of
    the number of stations combined, *urban_series* is the series from
    the urban station, re-based at *iyoff*, and *combined* is the
    combined neighbor series, based at *iyoff*.
    """

    weights = [0.0] * iyrm
    counts = [0] * iyrm
    urban_series = [MISSING] * iyrm
    combined = [MISSING] * iyrm

    urban_series[us.first_year - 1:us.last_year] = us.anomalies

    # start with the neighbor with the longest time record
    rs = neighbors[0]
    combined[rs.first_year - 1:rs.last_year] = rs.anomalies
    for m in range(len(rs.anomalies)):
        if valid(rs.anomalies[m]):
            weights[m + rs.first_year - 1] = rs.weight
            counts[m + rs.first_year - 1] = 1

    # add in the remaining stations
    for i, rs in enumerate(neighbors[1:]):
        dnew = [MISSING] * iyrm
        dnew[rs.first_year - 1:rs.last_year] = rs.anomalies
        cmbine(combined, weights, counts, dnew, rs.first_year, rs.last_year,
               rs.weight)

    return counts, urban_series, combined
示例#11
0
def adjust(first_year, station, series, fit, iy1, iy2, iy1a, iy2a, m1, m2,
           offset):
    (sl1, sl2, knee, sl0) = fit
    if not good_two_part_fit(fit, iy1a, iy2a):
        # Use linear approximation
        sl1, sl2 = sl0, sl0

    base = m1

    m1o, m2o = m1, m2
    m1 = -100
    m0 = 12 * (iy1 - first_year)  # Dec of year iy1
    for iy in range(iy1, iy2 + 1):
        sl = sl1
        if iy > knee:
            sl = sl2
        iya = iy
        if iy < iy1a:
            iya = iy1a
        if iy > iy2a:
            iya = iy2a
        adj = (iya - knee) * sl - (iy2a - knee) * sl2
        for m in range(m0, m0 + 12):
            mIdx = m - base
            if mIdx < 0:
                continue
            if m >= m1o and m <= m2o and valid(series[mIdx + offset]):
                if m1 < 0:
                    m1 = m
                series[mIdx + offset] = series[mIdx + offset] + adj
                m2 = m

        m0 = m0 + 12

    return m1, m2
示例#12
0
def calc_monthly_USHCN_offsets(u_record, g_record):
    u_years = u_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    g_years = g_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    reversed_year_pairs = list(reversed(zip(u_years, g_years)))

    diffs = [0.0] * 12
    for month in range(12):
        sum = 0.0
        count = 0
        for u_year, g_year in reversed_year_pairs:
            u_temp, g_temp = u_year[month], g_year[month]
            if valid(u_temp) and valid(g_temp):
                sum += u_temp - g_temp
                count += 1
                if count == parameters.USHCN_offset_max_months:
                    break
        if count > 0:
            diffs[month] = sum / count
    return diffs
示例#13
0
def calc_monthly_USHCN_offsets(u_record, g_record):
    u_years = u_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    g_years = g_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    reversed_year_pairs = list(reversed(zip(u_years, g_years)))

    diffs = [0.0] * 12
    for month in range(12):
        sum = 0.0
        count = 0
        for u_year, g_year in reversed_year_pairs:
            u_temp, g_temp = u_year[month], g_year[month]
            if valid(u_temp) and valid(g_temp):
                sum += u_temp - g_temp
                count += 1
                if count == parameters.USHCN_offset_max_months:
                    break
        if count > 0:
            diffs[month] = sum / count
    return diffs
示例#14
0
def valid_mean(seq, min=1):
    """Takes a sequence, *seq*, and computes the mean of the valid
    items (using the valid() function).  If there are fewer than *min*
    valid items, the mean is MISSING."""

    count = 0
    sum = 0.0
    for x in seq:
        if valid(x):
            sum += x
            count += 1
    if count >= min:
        return sum/float(count)
    else:
        return MISSING
示例#15
0
def valid_mean(seq, min=1):
    """Takes a sequence, *seq*, and computes the mean of the valid
    items (using the valid() function).  If there are fewer than *min*
    valid items, the mean is MISSING."""

    count = 0
    sum = 0.0
    for x in seq:
        if valid(x):
            sum += x
            count += 1
    if count >= min:
        return sum / float(count)
    else:
        return MISSING
示例#16
0
def monthly_anomalies(data, reference_period=None, base_year=-9999):
    """Calculate monthly anomalies, by subtracting from every datum
    the mean for its month.  A pair of (monthly_mean, monthly_anom) is
    returned.  *monthly_mean* is a 12-long sequence giving the mean for
    each of the 12 months; *monthly_anom* is a 12-long sequence giving
    the anomalized series for each of the 12 months.

    If *reference_period* is supplied then it should be a pair (*first*,
    *last) and the mean for a month is taken over the period (an example
    would be reference_period=(1951,1980)).  *base_year* specifies the
    first year of the data.
    
    The input data is a flat sequence, one datum per month.
    Effectively the data changes shape as it passes through this
    function.
    """

    years = len(data) // 12
    if reference_period:
        base = reference_period[0] - base_year
        limit = reference_period[1] - base_year + 1
    else:
        # Setting base, limit to (0,0) is a bit of a hack, but it
        # does work.
        base = 0
        limit = 0
    monthly_mean = []
    monthly_anom = []
    for m in range(12):
        row = data[m::12]
        mean = valid_mean(row[base:limit])
        if invalid(mean):
            # Fall back to using entire period
            mean = valid_mean(row)
        monthly_mean.append(mean)
        if valid(mean):

            def asanom(datum):
                """Convert a single datum to anomaly."""
                if valid(datum):
                    return datum - mean
                return MISSING

            monthly_anom.append(map(asanom, row))
        else:
            monthly_anom.append([MISSING] * years)
    return monthly_mean, monthly_anom
示例#17
0
def monthly_anomalies(data, reference_period=None, base_year=-9999):
    """Calculate monthly anomalies, by subtracting from every datum
    the mean for its month.  A pair of (monthly_mean, monthly_anom) is
    returned.  *monthly_mean* is a 12-long sequence giving the mean for
    each of the 12 months; *monthly_anom* is a 12-long sequence giving
    the anomalized series for each of the 12 months.

    If *reference_period* is supplied then it should be a pair (*first*,
    *last) and the mean for a month is taken over the period (an example
    would be reference_period=(1951,1980)).  *base_year* specifies the
    first year of the data.
    
    The input data is a flat sequence, one datum per month.
    Effectively the data changes shape as it passes through this
    function.
    """

    years = len(data) // 12
    if reference_period:
        base = reference_period[0] - base_year
        limit = reference_period[1] - base_year + 1
    else:
        # Setting base, limit to (0,0) is a bit of a hack, but it
        # does work.
        base = 0
        limit = 0
    monthly_mean = []
    monthly_anom = []
    for m in range(12):
        row = data[m::12]
        mean = valid_mean(row[base:limit])
        if invalid(mean):
            # Fall back to using entire period
            mean = valid_mean(row)
        monthly_mean.append(mean)
        if valid(mean):
            def asanom(datum):
                """Convert a single datum to anomaly."""
                if valid(datum):
                    return datum - mean
                return MISSING
            monthly_anom.append(map(asanom, row))
        else:
            monthly_anom.append([MISSING]*years)
    return monthly_mean, monthly_anom
示例#18
0
def extend_range(series, count, first, last):
    """Extend the range for adjusting, if possible.  *first* and *last*
    are the calendar years that define the range of quorate years.
    *count* gives the total number of quorate years in that range (these
    are computed in `prepare_series`).  *series* is the annual anomalies
    (based at BASE_YEAR) for the urban station.

    Returns a pair of calendar years for the extended range.  If no
    extension is possible, the quorate range *first* to *last* is
    returned.
    """

    iyxtnd = int(
        round(count / parameters.urban_adjustment_proportion_good) -
        (last - first + 1))
    if iyxtnd == 0:
        # No extension possible.
        return first, last
    assert iyxtnd > 0

    # The first and last years for which the urban station has a
    # valid annual anomaly.
    valid_years = [i for i, x in enumerate(series) if valid(x)]
    urban_first = min(valid_years)
    urban_last = max(valid_years)
    # Convert to calendar years, and extend by 1 year in each
    # direction to include possible partial years.
    urban_first += giss_data.BASE_YEAR - 1
    urban_last += giss_data.BASE_YEAR + 1

    # When extending, extend to include all of the recent part
    # of the urban record...
    lxend = urban_last - last
    if iyxtnd > lxend:
        # ... and if we have enough "spare years" extend some or
        # all of the earlier part of the urban record.
        first -= (iyxtnd - lxend)
        first = max(first, urban_first)
    last = urban_last
    return first, last
示例#19
0
def extend_range(series, count, first, last):
    """Extend the range for adjusting, if possible.  *first* and *last*
    are the calendar years that define the range of quorate years.
    *count* gives the total number of quorate years in that range (these
    are computed in `prepare_series`).  *series* is the annual anomalies
    (based at BASE_YEAR) for the urban station.

    Returns a pair of calendar years for the extended range.  If no
    extension is possible, the quorate range *first* to *last* is
    returned.
    """

    iyxtnd = int(round(count / parameters.urban_adjustment_proportion_good)
                 - (last - first + 1))
    if iyxtnd == 0:
        # No extension possible.
        return first, last
    assert iyxtnd > 0

    # The first and last years for which the urban station has a
    # valid annual anomaly.
    valid_years = [i for i,x in enumerate(series) if valid(x)]
    urban_first = min(valid_years)
    urban_last = max(valid_years)
    # Convert to calendar years, and extend by 1 year in each
    # direction to include possible partial years.
    urban_first += giss_data.BASE_YEAR - 1
    urban_last += giss_data.BASE_YEAR + 1

    # When extending, extend to include all of the recent part
    # of the urban record...
    lxend = urban_last - last
    if iyxtnd > lxend:
        # ... and if we have enough "spare years" extend some or
        # all of the earlier part of the urban record.
        first -= (iyxtnd - lxend)
        first = max(first, urban_first)
    last = urban_last
    return first, last
示例#20
0
def alter_discont(data):
    """Modifies records as specified in config/Ts.discont.RS.alter.IN,
    by adding the delta to every datum for that station prior to the
    specified month.  Yes, this is very similar to adjust_helena().
    """

    alter_dict = read_config.get_alter_dict()
    for record in data:
        if alter_dict.has_key(record.uid):
            series = record.series
            (a_month, a_year, a_num) = alter_dict[record.uid]
            begin = record.first_year
            # Month index of the month in the config file.
            M = (a_year - begin)*12 + a_month - 1
            # Every (valid) month up to and not including the month in
            # question is adjusted.
            for i in range(M):
                if valid(series[i]):
                    series[i] += a_num
            record.set_series(record.first_month, series)

        yield record
示例#21
0
def combine_neighbours(iyrm, neighbours):
    """Combines the neighbour stations *neighbours*, weighted according
    to their .weight property (previously computed to be based on distance
    from the urban station being considered), to give a combined
    annual anomaly series.
    
    *iyrm* is the length of the resulting combined series.

    This function assumes that each of the neighbours annual anomaly
    series begins in the same year; the result series begins in that
    year also.
    
    Returns a tuple: (*counts*, *combined*), where
    *counts* is a per-year list of the number of stations combined,
    *combined* is the combined neighbour series.
    """

    weights = [0.0] * iyrm
    counts = [0] * iyrm
    combined = [MISSING] * iyrm

    # Generally, *neighbours* has been sorted, so that the first element
    # is the neighbour with the longest time record (most valid years).
    # We start with that one ...
    rs = neighbours[0]
    assert len(rs.anomalies) <= iyrm
    combined[:len(rs.anomalies)] = rs.anomalies
    for i, anom in enumerate(rs.anomalies):
        if valid(anom):
            weights[i] = rs.weight
            counts[i] = 1

    # ... and add in the remaining stations.
    for rs in neighbours[1:]:
        cmbine(combined, weights, counts, rs.anomalies, rs.weight)

    return counts, combined
示例#22
0
def combine_neighbours(iyrm, neighbours):
    """Combines the neighbour stations *neighbours*, weighted according
    to their .weight property (previously computed to be based on distance
    from the urban station being considered), to give a combined
    annual anomaly series.
    
    *iyrm* is the length of the resulting combined series.

    This function assumes that each of the neighbours annual anomaly
    series begins in the same year; the result series begins in that
    year also.
    
    Returns a tuple: (*counts*, *combined*), where
    *counts* is a per-year list of the number of stations combined,
    *combined* is the combined neighbour series.
    """

    weights = [0.0] * iyrm
    counts = [0] * iyrm
    combined = [MISSING] * iyrm

    # Generally, *neighbours* has been sorted, so that the first element
    # is the neighbour with the longest time record (most valid years).
    # We start with that one ...
    rs = neighbours[0]
    assert len(rs.anomalies) <= iyrm
    combined[:len(rs.anomalies)] = rs.anomalies
    for i,anom in enumerate(rs.anomalies):
        if valid(anom):
            weights[i] = rs.weight
            counts[i] = 1

    # ... and add in the remaining stations.
    for rs in neighbours[1:]:
        cmbine(combined, weights, counts, rs.anomalies, rs.weight)

    return counts, combined
示例#23
0
 def asanom(datum):
     """Convert a single datum to anomaly."""
     if valid(datum):
         return datum - mean
     return MISSING
示例#24
0
 def adj(t, d):
     if valid(t):
         return t - d
     return t
示例#25
0
def subbox_to_box(meta, cells, celltype='BOX'):
    """Aggregate the subboxes (aka cells, typically 8000 per globe)
    into boxes (typically 80 boxes per globe), and combine records to
    produce one time series per box.

    *celltype* is used for logging, using a distinct (3 character) code
    will allow the log output for the land, ocean, and land--ocean
    analyses to be separated.

    *meta* specifies the meta data and is used to determine the first
    year (meta.yrbeg) and length (meta.monm) for all the resulting
    series.

    Returns an iterator of box data: for each box a quadruple of
    (*anom*, *weight*, *ngood*, *box*) is yielded.  *anom* is the
    temperature anomaly series, *weight* is the weights for the series
    (number of cells contributing for each month), *ngood* is total
    number of valid data in the series, *box* is a 4-tuple that
    describes the regions bounds: (southern, northern, western, eastern).
    """

    # The (80) large boxes.
    boxes = list(eqarea.grid())
    # For each box, make a list of contributors (cells that contribute
    # to the box time series); initially empty.
    contributordict = dict((box, []) for box in boxes)
    # Partition the cells into the boxes.
    for cell in cells:
        box = whichbox(boxes, cell.box)
        contributordict[box].append(cell)

    def padded_series(s):
        """Produce a series, that is padded to start in meta.yrbeg and
        is of length meta.monm months.
        *s* should be a giss_data.Series instance.
        """

        result = [MISSING] * meta.monm
        offset = 12 * (s.first_year - meta.yrbeg)
        result[offset:offset+len(s)] = s.series
        return result

    # For each box, sort and combine the contributing cells, and output
    # the result (by yielding it).
    for box in boxes:
        contributors = contributordict[box]
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        sort(contributors, lambda x,y: y.good_count - x.good_count)

        best = contributors[0]
        box_series = padded_series(best)
        box_weight = [float(valid(a)) for a in box_series]

        # Start the *contributed* list with this cell.
        l = [any(valid(v) for v in box_series[i::12]) for i in range(12)]
        s = ''.join('01'[x] for x in l)
        contributed = [[best.uid, 1.0, s]]

        # Loop over the remaining contributors.
        for cell in contributors[1:]:
            if cell.good_count >= parameters.subbox_min_valid:
                addend_series = padded_series(cell)
                weight = 1.0
                station_months = series.combine(box_series, box_weight,
                    addend_series, weight, parameters.box_min_overlap)
                s = ''.join('01'[bool(x)] for x in station_months)
            else:
                weight = 0.0
                s = '0'*12
            contributed.append([cell.uid, weight, s])

        box_first_year = meta.yrbeg
        series.anomalize(box_series, parameters.subbox_reference_period,
                         box_first_year)
        uid = giss_data.boxuid(box, celltype=celltype)
        log.write("%s cells %s\n" % (uid, asjson(contributed)))
        ngood = sum(valid(a) for a in box_series)
        yield (box_series, box_weight, ngood, box)
示例#26
0
def urban_adjustments(anomaly_stream):
    """Takes an iterator of station records and applies an adjustment
    to urban stations to compensate for urban temperature effects.
    Returns an iterator of station records.  Rural stations are passed
    unchanged.  Urban stations which cannot be adjusted are discarded.

    The adjustment follows a linear or two-part linear fit to the
    difference in annual anomalies between the urban station and the
    combined set of nearby rural stations.  The linear fit is to allow
    for a linear effect at the urban station.  The two-part linear fit
    is to allow for a model of urban effect which starts or stops at
    some point during the time series.

    The algorithm is essentially as follows:

    For each urban station:
        1. Find all the rural stations within a fixed radius;
        2. Combine the annual anomaly series for those rural stations, in
           order of valid-data count;
        3. Calculate a two-part linear fit for the difference between
           the urban annual anomalies and this combined rural annual anomaly;
        4. If this fit is satisfactory, apply it; otherwise apply a linear fit.

        If there are not enough nearby rural stations, or the combined
        rural record does not have enough overlap with the urban
        record, try a second time for this urban station, with a
        larger radius.  If there is still not enough data, discard the
        urban station.
     """

    last_year = giss_data.get_ghcn_last_year()
    first_year = 1880

    iyoff = giss_data.BASE_YEAR - 1
    iyrm = last_year - iyoff

    rural_stations = []
    urban_stations = {}

    pi180 = math.pi / 180.0

    all = []
    for record in anomaly_stream:
        station = record.station
        all.append(record)
        record.urban_adjustment = None
        annual_anomaly(record)
        if record.anomalies is None:
            continue
        length = len(record.anomalies)
        d = Struct()
        d.anomalies = record.anomalies
        d.cslat = math.cos(station.lat * pi180)
        d.snlat = math.sin(station.lat * pi180)
        d.cslon = math.cos(station.lon * pi180)
        d.snlon = math.sin(station.lon * pi180)
        d.id = record.uid
        d.first_year = record.first - iyoff
        d.last_year = d.first_year + length - 1
        d.station = station
        d.record = record
        if is_rural(station):
            rural_stations.append(d)
        else:
            urban_stations[record] = d

    # Sort the rural stations according to the length of the time record
    # (ignoring gaps).
    for st in rural_stations:
        st.recLen = len([v for v in st.anomalies if valid(v)])
    rural_stations.sort(key=lambda s:s.recLen)
    rural_stations.reverse()

    # Combine time series for rural stations around each urban station
    for record in all:
        us = urban_stations.get(record, None)
        if us is None:
            # Just remove leading/trailing invalid values for rural stations.
            record.strip_invalid()
            record.begin = record.first
            record.end = record.last
            yield record
            continue

        iyu1 = us.first_year + iyoff - 1 # subtract 1 for a possible partial yr
        iyu2 = us.last_year + iyoff + 1  # add 1 for partial year

        usingFullRadius = False
        dropStation = False
        needNewNeighbours = True
        while True:
            if needNewNeighbours:
                if usingFullRadius:
                    radius = parameters.urban_adjustment_full_radius
                else:
                    radius = parameters.urban_adjustment_full_radius / 2
                neighbors = get_neighbours(us, rural_stations, radius)
                if not neighbors:
                    if usingFullRadius:
                        dropStation = True
                        break
                    usingFullRadius = True
                    needNewNeighbours = True
                    continue

                counts, urban_series, combined = combine_neighbors(
                        us, iyrm, iyoff, neighbors)
                iy1 = 1
                needNewNeighbours = False

            points, quorate_count, first, last = prepare_series(
                iy1, iyrm, combined, urban_series, counts, iyoff)

            if quorate_count < parameters.urban_adjustment_min_years:
                if usingFullRadius:
                    dropStation = True
                    break
                usingFullRadius = True
                needNewNeighbours = True
                continue

            if quorate_count >= (parameters.urban_adjustment_proportion_good
                                 * (last - first + 0.9)):
                break

            # Not enough good years for the given range.  Try to save
            # cases in which the gaps are in the early part, by
            # dropping that part and going around to prepare_series
            # again.
            iy1 = int(last - (quorate_count - 1) /
                      parameters.urban_adjustment_proportion_good)
            if iy1 < first + 1:
                iy1 = first + 1                  # avoid infinite loop

        if dropStation:
            continue

        fit = getfit(points)
        # find extended range
        iyxtnd = int(round(quorate_count /
                           parameters.urban_adjustment_proportion_good)
                     - (last - first + 1))
        n1x = first + iyoff
        n2x = last + iyoff
        if iyxtnd < 0:
            sys.exit('impossible')
        if iyxtnd > 0:
            lxend = iyu2 - (last + iyoff)
            if iyxtnd <= lxend:
                 n2x = n2x + lxend
            else:
                 n1x = n1x - (iyxtnd - lxend)
                 if n1x < iyu1:
                     n1x = iyu1
                 n2x = iyu2

        series = record.series
        # adjust
        m1 = record.rel_first_month + record.good_start_idx
        m2 = record.rel_first_month + record.good_end_idx - 1
        offset = record.good_start_idx # index of first valid month
        a, b = adjust(first_year, record, series, fit, n1x, n2x,
                      first + iyoff, last + iyoff, m1, m2, offset)
        # a and b are numbers of new first and last valid months
        aa = a - m1
        bb = b - a + 1
        record.set_series(a-1 + first_year * 12 + 1,
                          series[aa + offset:aa + offset + bb])
        record.begin = ((a-1) / 12) + first_year
        record.first = record.begin
        record.end = ((b-1) / 12) + first_year
        record.last = record.last_year
        yield record
示例#27
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    station_records = list(station_records)

    log = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Extend box, by half a box east and west and by arc north
        # and south.
        extent = [box[0] - arcdeg,
                  box[1] + arcdeg,
                  box[2] - 0.5 * (box[3] - box[2]),
                  box[3] + 0.5 * (box[3] - box[2])]
        if box[0] <= -90 or box[1] >= 90:
            # polar
            extent[2] = -180.0
            extent[3] = +180.0

        region_records = list(inbox(station_records, *extent))
        # Descending sort by number of good records
        # TODO: Switch to using Python's sort method here, although it
        # will change the results.
        sort(region_records, lambda x,y: y.good_count - x.good_count)

        # Count how many cells are empty
        n_empty_cells = 0
        # Used to generate the "subbox at" rows in the log.
        lastcentre = (None, None)
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            log.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % (
              centre + (n_empty_cells,)))
            log.flush()
            lastcentre = centre
            # Of possible station records for this region, filter for those
            # from stations within radius of subbox centre.
            incircle_records = list(incircle(region_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if len(incircle_records) == 0:
                box_obj = giss_data.SubboxRecord(subbox_series,
                    box=list(subbox), stations=0, station_months=0,
                    d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise data with first station
            record = incircle_records[0]
            total_good_months = record.good_count
            total_stations = 1

            max_weight = record.weight
            offset = record.rel_first_month - 1
            a = record.series # just a temporary
            subbox_series[offset:offset + len(a)] = a
            weight = [0.0] * max_months
            for i in range(len(a)):
                if valid(a[i]):
                    weight[i + offset] = record.weight

            # Add in the remaining stations
            for record in incircle_records[1:]:
                # TODO: A StationMethod method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, record.weight,
                    record.rel_first_year, record.rel_last_year + 1,
                    parameters.gridding_min_overlap)
                total_good_months += station_months
                if station_months == 0:
                    continue
                total_stations += 1

                if max_weight < record.weight:
                    max_weight = record.weight

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.SubboxRecord(subbox_series, n=max_months,
                    box=list(subbox), stations=total_stations,
                    station_months=total_good_months,
                    d=radius*(1-max_weight))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        log.write(
          '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' %
            (tuple(box) + (n_empty_cells,plural_suffix)))
    log.write("\n")
示例#28
0
def subbox_to_box(meta, cells, celltype='BOX'):
    """Aggregate the subboxes (aka cells, typically 8000 per globe)
    into boxes (typically 80 boxes per globe), and combine records to
    produce one time series per box.

    *celltype* is used for logging, using a distinct (3 character) code
    will allow the log output for the land, ocean, and land--ocean
    analyses to be separated.

    *meta* specifies the meta data and is used to determine the first
    year (meta.yrbeg) and length (meta.monm) for all the resulting
    series.

    Returns an iterator of box data: for each box a quadruple of
    (*anom*, *weight*, *ngood*, *box*) is yielded.  *anom* is the
    temperature anomaly series, *weight* is the weights for the series
    (number of cells contributing for each month), *ngood* is total
    number of valid data in the series, *box* is a 4-tuple that
    describes the regions bounds: (southern, northern, western, eastern).
    """

    # The (80) large boxes.
    boxes = list(eqarea.grid())
    # For each box, make a list of contributors (cells that contribute
    # to the box time series); initially empty.
    contributordict = dict((box, []) for box in boxes)
    # Partition the cells into the boxes.
    for cell in cells:
        box = whichbox(boxes, cell.box)
        contributordict[box].append(cell)

    def padded_series(s):
        """Produce a series, that is padded to start in meta.yrbeg and
        is of length meta.monm months.
        *s* should be a giss_data.Series instance.
        """

        result = [MISSING] * meta.monm
        offset = 12 * (s.first_year - meta.yrbeg)
        result[offset:offset + len(s)] = s.series
        return result

    # For each box, sort and combine the contributing cells, and output
    # the result (by yielding it).
    for box in boxes:
        contributors = contributordict[box]
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        sort(contributors, lambda x, y: y.good_count - x.good_count)

        best = contributors[0]
        box_series = padded_series(best)
        box_weight = [float(valid(a)) for a in box_series]

        # Start the *contributed* list with this cell.
        l = [any(valid(v) for v in box_series[i::12]) for i in range(12)]
        s = ''.join('01'[x] for x in l)
        contributed = [[best.uid, 1.0, s]]

        # Loop over the remaining contributors.
        for cell in contributors[1:]:
            if cell.good_count >= parameters.subbox_min_valid:
                addend_series = padded_series(cell)
                weight = 1.0
                station_months = series.combine(box_series, box_weight,
                                                addend_series, weight,
                                                parameters.box_min_overlap)
                s = ''.join('01'[bool(x)] for x in station_months)
            else:
                weight = 0.0
                s = '0' * 12
            contributed.append([cell.uid, weight, s])

        box_first_year = meta.yrbeg
        series.anomalize(box_series, parameters.subbox_reference_period,
                         box_first_year)
        uid = giss_data.boxuid(box, celltype=celltype)
        log.write("%s cells %s\n" % (uid, asjson(contributed)))
        ngood = sum(valid(a) for a in box_series)
        yield (box_series, box_weight, ngood, box)
示例#29
0
def annual_anomaly(record):
    """Computes annual anomalies for the station record *record*.
    Returns a list of annual anomalies, one datum for each year (12
    months) of the input record.  Years for which an annual anomaly
    cannot be computed are recorded as MISSING.  The returned series is
    padded so that it begins in BASE_YEAR (that is, 1880).

    If no anomalies can be computed, then None is returned.

    The algorithm is as follows: compute monthly averages, then
    monthly anomalies, then seasonal anomalies (means of monthly
    anomalies for at least two months) then annual anomalies (means
    of seasonal anomalies for at least three seasons).

    This function assumes that the series starts in January.
    """

    # Set to True if we have an annual anomaly for at least one year.
    good = False
    series = record.series
    monthly_means = []
    for m in range(12):
        month_data = series[m::12]
        # Neglect December of final year, as we do not use its season.
        if m == 11:
            month_data = month_data[:-1]
        month_data = filter(valid, month_data)
        monthly_means.append(float(sum(month_data)) / len(month_data))
    annual_anoms = []
    first = None
    for y in range(len(series) / 12):
        # Seasons are Dec-Feb, Mar-May, Jun-Aug, Sep-Nov.
        # (Dec from previous year).
        total = [0.0] * 4  # total monthly anomaly for each season
        count = [0] * 4  # number of valid months in each season
        for m in range(-1, 11):
            index = y * 12 + m
            if index >= 0:  # no Dec value in year -1
                datum = series[index]
                if valid(datum):
                    # season number 0-3
                    season = (m + 1) // 3
                    total[season] += datum - monthly_means[m % 12]
                    count[season] += 1
        season_anomalies = []  # list of valid seasonal anomalies
        for s in range(4):
            # valid seasonal anomaly requires at least 2 valid months
            if count[s] >= 2:
                season_anomalies.append(total[s] / count[s])
        # valid annual anomaly requires at least 3 valid seasons
        if len(season_anomalies) > 2:
            good = True
            annual_anoms.append(sum(season_anomalies) / len(season_anomalies))
        else:
            annual_anoms.append(MISSING)

    if good:
        assert record.first_year >= giss_data.BASE_YEAR
        # Pad beginning of series so that it starts in
        # giss_data.BASE_YEAR
        pad = [MISSING] * (record.first_year - giss_data.BASE_YEAR)
        return pad + annual_anoms
    else:
        return None
示例#30
0
def adjust_record(record, fit, adjust_first, adjust_last):
    """Adjust the series according to the previously computed
    parameters.
    
    *record* is a (monthly) station record.  Its data series is replaced,
    but its length is not changed.  Data outside the adjustment range
    (see below) will become MISSING.
    
    *adjust_first*, *adjust_last* are calendar years: the first and
    last years that are subject to adjustment.  Adjustment years run
    from December prior to the year in question through to November
    (because the anomaly years do too).

    *fit* contains the parameters for two slopes that are used to
    make the adjustment: of slope *fit.slope1* between year *fit.first*
    and *fit.knee*, and of slope *fit.slope2* between year *fit.knee*
    and *fit.last*.  Any adjustment can be biased up or down without
    affecting the trend; the adjustment is chosen so that it is
    zero in the year *fit.last*.  Outside the range *fit.first* to
    *fit.last* the adjustment is constant (zero for the recent part,
    and the same adjustment as for year *fit.first* for the earlier
    part).
    """

    # We assume the series starts in January.
    assert record.first_month % 12 == 1

    # A fresh array for the new (adjusted) series.
    nseries = [MISSING] * len(record.series)

    sl1 = fit.slope1
    sl2 = fit.slope2
    if not good_two_part_fit(fit):
        # Use linear approximation.
        sl1 = sl2 = fit.slope

    # (because the adjustment range is extended by 1 on either
    # end) the adjustment range can be outside the range of data for the
    # series.  There are checks inside the loop to ignore those indexes.
    # *iy* is a calendar year.
    for iy in range(adjust_first, adjust_last + 1):
        sl = sl1
        if iy > fit.knee:
            sl = sl2
        # For the purposes of calculating the adjustment for the year,
        # clamp to the range [fit.first, fit.last].
        iya = max(fit.first, min(iy, fit.last))
        adj = (iya - fit.knee) * sl - (fit.last - fit.knee) * sl2
        # The anomaly years run from Dec to Nov.  So the adjustment
        # years do too.
        # The index into *series* that corresponds to December
        # immediately before the beginning of year *iy*.
        dec = 12 * (iy - record.first_year) - 1
        # *m* is an index into the *series* array.
        for m in range(dec, dec + 12):
            try:
                if m >= 0 and valid(record.series[m]):
                    nseries[m] = record.series[m] + adj
            except IndexError:
                break
    record.set_series(record.first_month, nseries)
示例#31
0
def zonav(boxed_data):
    """
    Perform Zonal Averaging.

    The input *boxed_data* is an iterator of boxed time series.
    The data in the boxes are combined to produce averages over
    various latitudinal zones.  Returns an iterator of
    (averages, weights, title) tuples, one per zone.

    14 zones are produced.  The first 8 are the basic belts that are used
    for the equal area grid, the remaining 6 are combinations:

      0 64N - 90N               \
      1 44N - 64N (asin 0.9)     -  8 24N - 90 N  (0 + 1 + 2)
      2 24N - 44N (asin 0.7)    /
      3 Equ - 24N (asin 0.4)    \_  9 24S - 24 N  (3 + 4)
      4 24S - Equ               /
      5 44S - 24S               \
      6 64S - 44S                - 10 90S - 24 S  (5 + 6 + 7)
      7 90S - 64S               /

     11 northern hemisphere (0 + 1 + 2 + 3)
     12 southern hemisphere (4 + 5 + 6 + 7)
     13 global (all belts 0 to 7)
    """

    (info, titlei) = boxed_data.next()
    iyrbeg = info[5]
    monm = info[3]
    nyrsin = monm / 12
    # One more than the last year with data
    yearlimit = nyrsin + iyrbeg

    yield (info, titlei)

    boxes_in_band, band_in_zone = zones()

    bands = len(boxes_in_band)

    lenz = [None] * bands
    wt = [None] * bands
    avg = [None] * bands
    # For each band, combine all the boxes in that band to create a band
    # record.
    for band in range(bands):
        # The temperature (anomaly) series for each of the boxes in this
        # band.
        box_series = [None] * boxes_in_band[band]
        # The weight series for each of the boxes in this band.
        box_weights = [None] * boxes_in_band[band]
        # "length" is the number of months (with valid data) in the box
        # series.  For each box in this band.
        box_length = [None] * boxes_in_band[band]
        for box in range(boxes_in_band[band]):
            # The last element in the tuple is the boundaries of the
            # box.  We ignore it.
            box_series[box], box_weights[box], box_length[box], _ = (
                boxed_data.next())
        # total number of valid data in band's boxes
        total_length = sum(box_length)
        if total_length == 0:
            wt[band] = [0.0] * monm
            avg[band] = [MISSING] * monm
        else:
            box_length, IORD = sort_perm(box_length)
            nr = IORD[0]
            # Copy the longest box record into *wt* and *avg*.
            # Using list both performs a copy and converts into a mutable
            # list.
            wt[band] = list(box_weights[nr])
            avg[band] = list(box_series[nr])
            # And combine the remaining series.
            for n in range(1, boxes_in_band[band]):
                nr = IORD[n]
                if box_length[n] == 0:
                    # Nothing in this box, and since we sorted by length,
                    # all the remaining boxes will also be empty.  We can
                    # stop combining boxes.
                    break
                series.combine(avg[band], wt[band], box_series[nr],
                               box_weights[nr], 0, nyrsin,
                               parameters.box_min_overlap)
        series.anomalize(avg[band], parameters.box_reference_period, iyrbeg)
        lenz[band] = sum(valid(a) for a in avg[band])
        yield (avg[band], wt[band])

    # We expect to have consumed all the boxes (the first 8 bands form a
    # partition of the boxes).  We check that the boxed_data stream is
    # exhausted and contains no more boxes.
    try:
        boxed_data.next()
        assert 0, "Too many boxes found"
    except StopIteration:
        # We fully expect to get here.
        pass

    # *lenz* contains the lengths of each zone 0 to 7 (the number of
    # valid months in each zone).
    lenz, iord = sort_perm(lenz)
    for zone in range(len(band_in_zone)):
        if lenz[0] == 0:
            raise Error('**** NO DATA FOR ZONE %d' % bands + zone)
        # Find the longest band that is in the special zone.
        for j1 in range(bands):
            if iord[j1] in band_in_zone[zone]:
                break
        else:
            # Should be an assertion really.
            raise Error('No band in special zone %d.' % zone)
        band = iord[j1]
        wtg = list(wt[band])
        avgg = list(avg[band])
        # Add in the remaining bands, in length order.
        for j in range(j1 + 1, bands):
            band = iord[j]
            if band not in band_in_zone[zone]:
                continue
            series.combine(avgg, wtg, avg[band], wt[band], 0, nyrsin,
                           parameters.box_min_overlap)
        series.anomalize(avgg, parameters.box_reference_period, iyrbeg)
        yield (avgg, wtg)
示例#32
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    # Clear Climate Code
    import earth # required for radius.

    # Convert to list because we re-use it for each box (region).
    station_records = list(station_records)
    # Descending sort by number of good records.
    # TODO: Switch to using Python's sort method here, although it
    # will change the results.
    sort(station_records, lambda x,y: y.good_count - x.good_count)

    # A dribble of progress messages.
    dribble = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Count how many cells are empty
        n_empty_cells = 0
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            dribble.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % (
              centre + (n_empty_cells,)))
            dribble.flush()
            # Determine the contributing stations to this grid cell.
            contributors = list(incircle(station_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if not contributors:
                box_obj = giss_data.Series(series=subbox_series,
                    box=list(subbox), stations=0, station_months=0,
                    d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise series and weight arrays with first station.
            record,wt = contributors[0]
            total_good_months = record.good_count
            total_stations = 1

            offset = record.rel_first_month - 1
            a = record.series # just a temporary
            subbox_series[offset:offset + len(a)] = a
            max_weight = wt
            weight = [wt*valid(v) for v in subbox_series]

            # For logging, keep a list of stations that contributed.
            # Each item in this list is a triple (in list form, so that
            # it can be converted to JSON easily) of [id12, weight,
            # months].  *id12* is the 12 character station identifier;
            # *weight* (a float) is the weight (computed based on
            # distance) of the station's series; *months* is a 12 digit
            # string that records whether each of the 12 months is used.
            # '0' in position *i* indicates that the month was not used,
            # a '1' indicates that is was used.  January is position 0.
            l = [any(valid(v) for v in subbox_series[i::12])
              for i in range(12)]
            s = ''.join('01'[x] for x in l)
            contributed = [[record.uid,wt,s]]

            # Add in the remaining stations
            for record,wt in contributors[1:]:
                # TODO: A method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, wt,
                    parameters.gridding_min_overlap)
                n_good_months = sum(station_months)
                total_good_months += n_good_months
                if n_good_months == 0:
                    contributed.append([record.uid, 0.0, '0'*12])
                    continue
                total_stations += 1
                s = ''.join('01'[bool(x)] for x in station_months)
                contributed.append([record.uid,wt,s])

                max_weight = max(max_weight, wt)

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.Series(series=subbox_series, n=max_months,
                    box=list(subbox), stations=total_stations,
                    station_months=total_good_months,
                    d=radius*(1-max_weight))
            log.write("%s stations %s\n" % (box_obj.uid,
              asjson(contributed)))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        dribble.write(
          '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' %
            (tuple(box) + (n_empty_cells,plural_suffix)))
    dribble.write("\n")
示例#33
0
def find_quintuples(new_sums, new_wgts,
                    begin, years, record, rec_begin,
                    new_id, rec_id, log):
    rec_begin = record.first_year
    rec_end = rec_begin + record.last_year - record.first_year

    actual_begin, actual_end = get_actual_endpoints(new_wgts, begin, years)

    max_begin = max(actual_begin, rec_begin)
    min_end = min(actual_end, rec_end)
    middle_year = int(.5 * (max_begin + min_end) + 0.5)
    log.write("max begin: %s\tmin end: %s\n" % (max_begin, min_end))

    new_data = average(new_sums, new_wgts, years)
    new_ann_mean, new_ann_anoms = monthly_annual(new_data)
    ann_std_dev = sigma(new_ann_anoms)
    log.write("ann_std_dev = %s\n" % ann_std_dev)
    new_offset = (middle_year - begin)
    new_len = len(new_ann_anoms)

    rec_ann_anoms = record.ann_anoms
    rec_ann_mean = record.ann_mean
    rec_offset = (middle_year - rec_begin)
    rec_len = len(rec_ann_anoms)

    ov_success = 0
    okay_flag = 0
    for rad in range(1, parameters.station_combine_bucket_radius + 1):
        count1 = sum1 = 0
        count2 = sum2 = 0
        for i in range(0, rad + 1):
            for sign in [-1, 1]:
                if sign == 1 and i == 0:
                    continue
                index1 = i * sign + new_offset
                index2 = i * sign + rec_offset
                if index1 < 0 or index1 >= new_len:
                    anom1 = MISSING
                else:
                    anom1 = new_ann_anoms[index1]
                if index2 < 0 or index2 >= rec_len:
                    anom2 = MISSING
                else:
                    anom2 = rec_ann_anoms[index2]
                if valid(anom1):
                    sum1 += anom1 + new_ann_mean
                    count1 += 1
                if valid(anom2):
                    sum2 += anom2 + rec_ann_mean
                    count2 += 1
        if (count1 >= parameters.station_combine_min_mid_years
            and count2 >= parameters.station_combine_min_mid_years):
            log.write("overlap success: %s %s\n" % (new_id, rec_id))
            ov_success = 1
            avg1 = sum1 / float(count1)
            avg2 = sum2 / float(count2)
            diff = abs(avg1 - avg2)
            log.write("diff = %s\n" % diff)
            if diff < ann_std_dev:
                okay_flag = 1
                log.write("combination success: %s %s\n" % (new_id, rec_id))
            else:
                log.write("combination failure: %s %s\n" % (new_id, rec_id))
            break
    if not ov_success:
        log.write("overlap failure: %s %s\n" % (new_id, rec_id))
    log.write("counts: %s\n" % ((count1, count2),))
    return okay_flag
示例#34
0
 def adj(t, d):
     if valid(t):
         return t - d
     return t
示例#35
0
def zonav(boxed_data):
    """
    Perform Zonal Averaging.

    The input *boxed_data* is an iterator of boxed time series.
    The data in the boxes are combined to produce averages over
    various latitudinal zones.  Returns an iterator of
    (averages, weights, title) tuples, one per zone.

    14 zones are produced.  The first 8 are the basic belts that are used
    for the equal area grid, the remaining 6 are combinations:

      0 64N - 90N               \
      1 44N - 64N (asin 0.9)     -  8 24N - 90 N  (0 + 1 + 2)
      2 24N - 44N (asin 0.7)    /
      3 Equ - 24N (asin 0.4)    \_  9 24S - 24 N  (3 + 4)
      4 24S - Equ               /
      5 44S - 24S               \
      6 64S - 44S                - 10 90S - 24 S  (5 + 6 + 7)
      7 90S - 64S               /

     11 northern hemisphere (0 + 1 + 2 + 3)
     12 southern hemisphere (4 + 5 + 6 + 7)
     13 global (all belts 0 to 7)
    """

    (info, titlei) = boxed_data.next()
    iyrbeg = info[5]
    monm = info[3]
    nyrsin = monm/12
    # One more than the last year with data
    yearlimit = nyrsin + iyrbeg

    yield (info, titlei)

    boxes_in_band,band_in_zone = zones()

    bands = len(boxes_in_band)

    lenz = [None] * bands
    wt = [None] * bands
    avg = [None] * bands
    # For each band, combine all the boxes in that band to create a band
    # record.
    for band in range(bands):
        # The temperature (anomaly) series for each of the boxes in this
        # band.
        box_series = [None] * boxes_in_band[band]
        # The weight series for each of the boxes in this band.
        box_weights = [None] * boxes_in_band[band]
        # "length" is the number of months (with valid data) in the box
        # series.  For each box in this band.
        box_length = [None] * boxes_in_band[band]
        for box in range(boxes_in_band[band]):
            # The last element in the tuple is the boundaries of the
            # box.  We ignore it.
            box_series[box], box_weights[box], box_length[box], _ = (
              boxed_data.next())
        # total number of valid data in band's boxes
        total_length = sum(box_length)
        if total_length == 0:
            wt[band] = [0.0]*monm
            avg[band] = [MISSING]*monm
        else:
            box_length,IORD = sort_perm(box_length)
            nr = IORD[0]
            # Copy the longest box record into *wt* and *avg*.
            # Using list both performs a copy and converts into a mutable
            # list.
            wt[band] = list(box_weights[nr])
            avg[band] = list(box_series[nr])
            # And combine the remaining series.
            for n in range(1,boxes_in_band[band]):
                nr = IORD[n]
                if box_length[n] == 0:
                    # Nothing in this box, and since we sorted by length,
                    # all the remaining boxes will also be empty.  We can
                    # stop combining boxes.
                    break
                series.combine(avg[band], wt[band],
                  box_series[nr], box_weights[nr], 0, nyrsin,
                  parameters.box_min_overlap)
        series.anomalize(avg[band], parameters.box_reference_period, iyrbeg)
        lenz[band] = sum(valid(a) for a in avg[band])
        yield (avg[band], wt[band])

    # We expect to have consumed all the boxes (the first 8 bands form a
    # partition of the boxes).  We check that the boxed_data stream is
    # exhausted and contains no more boxes.
    try:
        boxed_data.next()
        assert 0, "Too many boxes found"
    except StopIteration:
        # We fully expect to get here.
        pass

    # *lenz* contains the lengths of each zone 0 to 7 (the number of
    # valid months in each zone).
    lenz, iord = sort_perm(lenz)
    for zone in range(len(band_in_zone)):
        if lenz[0] == 0:
            raise Error('**** NO DATA FOR ZONE %d' % bands+zone)
        # Find the longest band that is in the special zone.
        for j1 in range(bands):
            if iord[j1] in band_in_zone[zone]:
                break
        else:
            # Should be an assertion really.
            raise Error('No band in special zone %d.' % zone)
        band = iord[j1]
        wtg = list(wt[band])
        avgg = list(avg[band])
        # Add in the remaining bands, in length order.
        for j in range(j1+1,bands):
            band = iord[j]
            if band not in band_in_zone[zone]:
                continue
            series.combine(avgg, wtg, avg[band], wt[band], 0,nyrsin,
                           parameters.box_min_overlap)
        series.anomalize(avgg, parameters.box_reference_period, iyrbeg)
        yield(avgg, wtg)
示例#36
0
def SBBXtoBX(data):
    """Simultaneously combine the land series and the ocean series and
    combine subboxes into boxes.  *data* should be an iterator of
    (land, ocean) subbox series pairs. Returns an iterator of box data.
    """

    # First item from iterator is normally a pair of metadataobjects,
    # one for land, one for ocean.  If we are piping step3 straight into
    # step5 then it is not a pair.  In that case we synthesize missing
    # ocean data.
    meta = data.next()
    try:
        land_meta, ocean_meta = meta
    except (TypeError, ValueError):
        # Use the land meta object for both land and ocean data
        land_meta,ocean_meta = meta, meta
        print "No ocean data; using land data only"
        data = blank_ocean_data(data)

    # number of subboxes within each box
    nsubbox = 100

    # TODO: Formalise use of only monthlies, see step 3.
    assert land_meta.mavg == 6
    NYRSIN = land_meta.monm/12
    combined_year_beg = min(land_meta.yrbeg, ocean_meta.yrbeg)
    # Index into the combined array of the first year of the land data.
    land_offset = 12*(land_meta.yrbeg-combined_year_beg)
    # As land_offset but for ocean data.
    ocean_offset = 12*(ocean_meta.yrbeg-combined_year_beg)
    combined_n_months = max(land_meta.monm + land_offset,
                            land_meta.monm + ocean_offset)

    info = [land_meta.mo1, land_meta.kq, land_meta.mavg, land_meta.monm,
            land_meta.monm4, combined_year_beg, land_meta.missing_flag,
            land_meta.precipitation_flag]

    info[4] = 2 * land_meta.monm + 5
    yield(info, land_meta.title)

    for box_number,box in enumerate(eqarea.grid()):
        # Averages for the land and ocean (one series per subbox)...
        avg = []
        wgtc = []
        # Eat the records from land and ocean 100 (nsubbox) at a time.
        # In other words, all 100 subboxes for the box.
        landsub,oceansub = zip(*itertools.islice(data, nsubbox))
        # :todo: combine below zip with above zip?
        for i, l, o in zip(range(nsubbox), landsub, oceansub):
            a = [MISSING]*combined_n_months
            if (o.good_count < parameters.subbox_min_valid
                or l.d < parameters.subbox_land_range):
                # use land series for this subbox
                a[land_offset:land_offset+len(l.series)] = l.series
                wgtc.append(l.good_count)
            else:
                # use ocean series for this subbox
                a[ocean_offset:ocean_offset+len(o.series)] = o.series
                wgtc.append(o.good_count)
            avg.append(a)

        # GISTEMP sort.
        # We want to end up with IORDR, the permutation array that
        # represents the sorter order.  IORDR[0] is the index (into the
        # *wgtc* array) of the longest record, IORDR[1] the index of the
        # next longest record, and so on.  We do that by decorating the
        # *wgtc* array with indexes 0 to 99, and then extracting the
        # (permuted) indexes into IORDR.
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        IORDR = range(nsubbox)
        sort(IORDR, lambda x,y: wgtc[y] - wgtc[x])

        # From here to the "for" loop over the cells (below) we are
        # initialising data for the loop.  Primarily the AVGR and WTR
        # arrays.
        nc = IORDR[0]

        # Weights for the box's record.
        wtr = [a != MISSING for a in avg[nc]]
        # Box record
        avgr = avg[nc][:]

        # Loop over the remaining cells.
        for nc in IORDR[1:]:
            if wgtc[nc] >= parameters.subbox_min_valid:
                series.combine(avgr, wtr, avg[nc], 1, 0,
                           combined_n_months/12, parameters.box_min_overlap)

        series.anomalize(avgr, parameters.subbox_reference_period,
                         combined_year_beg)
        ngood = sum(valid(a) for a in avgr)
        yield (avgr, wtr, ngood, box)
    # We've now consumed all 8000 input boxes and yielded 80 boxes.  We
    # need to tickle the input to check that it is exhausted and to
    # cause it to run the final tail of its generator.
    # We expect the call to .next() to raise StopIteration, which is
    # just what we want.
    data.next()
    # Ordinarily we never reach here.
    assert 0, "Too many input records"
示例#37
0
 def reclen(s):
     return len([v for v in s.anomalies if valid(v)])
示例#38
0
def adjust_record(record, fit, adjust_first, adjust_last):
    """Adjust the series according to the previously computed
    parameters.
    
    *record* is a (monthly) station record.  Its data series is replaced,
    but its length is not changed.  Data outside the adjustment range
    (see below) will become MISSING.
    
    *adjust_first*, *adjust_last* are calendar years: the first and
    last years that are subject to adjustment.  Adjustment years run
    from December prior to the year in question through to November
    (because the anomaly years do too).

    *fit* contains the parameters for two slopes that are used to
    make the adjustment: of slope *fit.slope1* between year *fit.first*
    and *fit.knee*, and of slope *fit.slope2* between year *fit.knee*
    and *fit.last*.  Any adjustment can be biased up or down without
    affecting the trend; the adjustment is chosen so that it is
    zero in the year *fit.last*.  Outside the range *fit.first* to
    *fit.last* the adjustment is constant (zero for the recent part,
    and the same adjustment as for year *fit.first* for the earlier
    part).
    """

    # We assume the series starts in January.
    assert record.first_month % 12 == 1

    # A fresh array for the new (adjusted) series.
    nseries = [MISSING] * len(record.series)

    sl1 = fit.slope1
    sl2 = fit.slope2
    if not good_two_part_fit(fit):
        # Use linear approximation.
        sl1 = sl2 = fit.slope

    # (because the adjustment range is extended by 1 on either
    # end) the adjustment range can be outside the range of data for the
    # series.  There are checks inside the loop to ignore those indexes.
    # *iy* is a calendar year.
    for iy in range(adjust_first, adjust_last + 1):
        sl = sl1
        if iy > fit.knee:
            sl = sl2
        # For the purposes of calculating the adjustment for the year,
        # clamp to the range [fit.first, fit.last].
        iya = max(fit.first, min(iy, fit.last))
        adj = (iya - fit.knee) * sl - (fit.last - fit.knee) * sl2
        # The anomaly years run from Dec to Nov.  So the adjustment
        # years do too.
        # The index into *series* that corresponds to December
        # immediately before the beginning of year *iy*.
        dec = 12 * (iy - record.first_year) - 1
        # *m* is an index into the *series* array.
        for m in range(dec, dec+12):
            try:
                if m >= 0 and valid(record.series[m]):
                    nseries[m] = record.series[m] + adj
            except IndexError:
                break
    record.set_series(record.first_month, nseries)
示例#39
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    # Clear Climate Code
    import earth  # required for radius.

    # Convert to list because we re-use it for each box (region).
    station_records = list(station_records)
    # Descending sort by number of good records.
    # TODO: Switch to using Python's sort method here, although it
    # will change the results.
    sort(station_records, lambda x, y: y.good_count - x.good_count)

    # A dribble of progress messages.
    dribble = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Count how many cells are empty
        n_empty_cells = 0
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            dribble.write("\rsubbox at %+05.1f%+06.1f (%d empty)" %
                          (centre + (n_empty_cells, )))
            dribble.flush()
            # Determine the contributing stations to this grid cell.
            contributors = list(incircle(station_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if not contributors:
                box_obj = giss_data.Series(series=subbox_series,
                                           box=list(subbox),
                                           stations=0,
                                           station_months=0,
                                           d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise series and weight arrays with first station.
            record, wt = contributors[0]
            total_good_months = record.good_count
            total_stations = 1

            offset = record.rel_first_month - 1
            a = record.series  # just a temporary
            subbox_series[offset:offset + len(a)] = a
            max_weight = wt
            weight = [wt * valid(v) for v in subbox_series]

            # For logging, keep a list of stations that contributed.
            # Each item in this list is a triple (in list form, so that
            # it can be converted to JSON easily) of [id12, weight,
            # months].  *id12* is the 12 character station identifier;
            # *weight* (a float) is the weight (computed based on
            # distance) of the station's series; *months* is a 12 digit
            # string that records whether each of the 12 months is used.
            # '0' in position *i* indicates that the month was not used,
            # a '1' indicates that is was used.  January is position 0.
            l = [
                any(valid(v) for v in subbox_series[i::12]) for i in range(12)
            ]
            s = ''.join('01'[x] for x in l)
            contributed = [[record.uid, wt, s]]

            # Add in the remaining stations
            for record, wt in contributors[1:]:
                # TODO: A method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, wt,
                    parameters.gridding_min_overlap)
                n_good_months = sum(station_months)
                total_good_months += n_good_months
                if n_good_months == 0:
                    contributed.append([record.uid, 0.0, '0' * 12])
                    continue
                total_stations += 1
                s = ''.join('01'[bool(x)] for x in station_months)
                contributed.append([record.uid, wt, s])

                max_weight = max(max_weight, wt)

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.Series(series=subbox_series,
                                       n=max_months,
                                       box=list(subbox),
                                       stations=total_stations,
                                       station_months=total_good_months,
                                       d=radius * (1 - max_weight))
            log.write("%s stations %s\n" % (box_obj.uid, asjson(contributed)))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        dribble.write(
            '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n'
            % (tuple(box) + (n_empty_cells, plural_suffix)))
    dribble.write("\n")
示例#40
0
 def asanom(datum):
     """Convert a single datum to anomaly."""
     if valid(datum):
         return datum - mean
     return MISSING
示例#41
0
 def reclen(s):
     return len([v for v in s.anomalies if valid(v)])
示例#42
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    station_records = list(station_records)

    log = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Extend box, by half a box east and west and by arc north
        # and south.
        extent = [
            box[0] - arcdeg, box[1] + arcdeg, box[2] - 0.5 * (box[3] - box[2]),
            box[3] + 0.5 * (box[3] - box[2])
        ]
        if box[0] <= -90 or box[1] >= 90:
            # polar
            extent[2] = -180.0
            extent[3] = +180.0

        region_records = list(inbox(station_records, *extent))
        # Descending sort by number of good records
        # TODO: Switch to using Python's sort method here, although it
        # will change the results.
        sort(region_records, lambda x, y: y.good_count - x.good_count)

        # Count how many cells are empty
        n_empty_cells = 0
        # Used to generate the "subbox at" rows in the log.
        lastcentre = (None, None)
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            log.write("\rsubbox at %+05.1f%+06.1f (%d empty)" %
                      (centre + (n_empty_cells, )))
            log.flush()
            lastcentre = centre
            # Of possible station records for this region, filter for those
            # from stations within radius of subbox centre.
            incircle_records = list(incircle(region_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if len(incircle_records) == 0:
                box_obj = giss_data.SubboxRecord(subbox_series,
                                                 box=list(subbox),
                                                 stations=0,
                                                 station_months=0,
                                                 d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise data with first station
            record = incircle_records[0]
            total_good_months = record.good_count
            total_stations = 1

            max_weight = record.weight
            offset = record.rel_first_month - 1
            a = record.series  # just a temporary
            subbox_series[offset:offset + len(a)] = a
            weight = [0.0] * max_months
            for i in range(len(a)):
                if valid(a[i]):
                    weight[i + offset] = record.weight

            # Add in the remaining stations
            for record in incircle_records[1:]:
                # TODO: A StationMethod method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, record.weight,
                    record.rel_first_year, record.rel_last_year + 1,
                    parameters.gridding_min_overlap)
                total_good_months += station_months
                if station_months == 0:
                    continue
                total_stations += 1

                if max_weight < record.weight:
                    max_weight = record.weight

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.SubboxRecord(subbox_series,
                                             n=max_months,
                                             box=list(subbox),
                                             stations=total_stations,
                                             station_months=total_good_months,
                                             d=radius * (1 - max_weight))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        log.write(
            '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n'
            % (tuple(box) + (n_empty_cells, plural_suffix)))
    log.write("\n")
示例#43
0
def urban_adjustments(anomaly_stream):
    """Takes an iterator of station records and applies an adjustment
    to urban stations to compensate for urban temperature effects.
    Returns an iterator of station records.  Rural stations are passed
    unchanged.  Urban stations which cannot be adjusted are discarded.

    The adjustment follows a linear or two-part linear fit to the
    difference in annual anomalies between the urban station and the
    combined set of nearby rural stations.  The linear fit is to allow
    for a linear effect at the urban station.  The two-part linear fit
    is to allow for a model of urban effect which starts or stops at
    some point during the time series.

    The algorithm is essentially as follows:

    For each urban station:
        1. Find all the rural stations within a fixed radius;
        2. Combine the annual anomaly series for those rural stations, in
           order of valid-data count;
        3. Calculate a two-part linear fit for the difference between
           the urban annual anomalies and this combined rural annual anomaly;
        4. If this fit is satisfactory, apply it; otherwise apply a linear fit.

        If there are not enough nearby rural stations, or the combined
        rural record does not have enough overlap with the urban
        record, try a second time for this urban station, with a
        larger radius.  If there is still not enough data, discard the
        urban station.
     """

    last_year = giss_data.get_ghcn_last_year()
    first_year = 1880

    iyoff = giss_data.BASE_YEAR - 1
    iyrm = last_year - iyoff

    rural_stations = []
    urban_stations = {}

    pi180 = math.pi / 180.0

    all = []
    for record in anomaly_stream:
        station = record.station
        all.append(record)
        record.urban_adjustment = None
        annual_anomaly(record)
        if record.anomalies is None:
            continue
        length = len(record.anomalies)
        d = Struct()
        d.anomalies = record.anomalies
        d.cslat = math.cos(station.lat * pi180)
        d.snlat = math.sin(station.lat * pi180)
        d.cslon = math.cos(station.lon * pi180)
        d.snlon = math.sin(station.lon * pi180)
        d.id = record.uid
        d.first_year = record.first - iyoff
        d.last_year = d.first_year + length - 1
        d.station = station
        d.record = record
        if is_rural(station):
            rural_stations.append(d)
        else:
            urban_stations[record] = d

    # Sort the rural stations according to the length of the time record
    # (ignoring gaps).
    for st in rural_stations:
        st.recLen = len([v for v in st.anomalies if valid(v)])
    rural_stations.sort(key=lambda s: s.recLen)
    rural_stations.reverse()

    # Combine time series for rural stations around each urban station
    for record in all:
        us = urban_stations.get(record, None)
        if us is None:
            # Just remove leading/trailing invalid values for rural stations.
            record.strip_invalid()
            record.begin = record.first
            record.end = record.last
            yield record
            continue

        iyu1 = us.first_year + iyoff - 1  # subtract 1 for a possible partial yr
        iyu2 = us.last_year + iyoff + 1  # add 1 for partial year

        usingFullRadius = False
        dropStation = False
        needNewNeighbours = True
        while True:
            if needNewNeighbours:
                if usingFullRadius:
                    radius = parameters.urban_adjustment_full_radius
                else:
                    radius = parameters.urban_adjustment_full_radius / 2
                neighbors = get_neighbours(us, rural_stations, radius)
                if not neighbors:
                    if usingFullRadius:
                        dropStation = True
                        break
                    usingFullRadius = True
                    needNewNeighbours = True
                    continue

                counts, urban_series, combined = combine_neighbors(
                    us, iyrm, iyoff, neighbors)
                iy1 = 1
                needNewNeighbours = False

            points, quorate_count, first, last = prepare_series(
                iy1, iyrm, combined, urban_series, counts, iyoff)

            if quorate_count < parameters.urban_adjustment_min_years:
                if usingFullRadius:
                    dropStation = True
                    break
                usingFullRadius = True
                needNewNeighbours = True
                continue

            if quorate_count >= (parameters.urban_adjustment_proportion_good *
                                 (last - first + 0.9)):
                break

            # Not enough good years for the given range.  Try to save
            # cases in which the gaps are in the early part, by
            # dropping that part and going around to prepare_series
            # again.
            iy1 = int(last - (quorate_count - 1) /
                      parameters.urban_adjustment_proportion_good)
            if iy1 < first + 1:
                iy1 = first + 1  # avoid infinite loop

        if dropStation:
            continue

        fit = getfit(points)
        # find extended range
        iyxtnd = int(
            round(quorate_count /
                  parameters.urban_adjustment_proportion_good) -
            (last - first + 1))
        n1x = first + iyoff
        n2x = last + iyoff
        if iyxtnd < 0:
            sys.exit('impossible')
        if iyxtnd > 0:
            lxend = iyu2 - (last + iyoff)
            if iyxtnd <= lxend:
                n2x = n2x + lxend
            else:
                n1x = n1x - (iyxtnd - lxend)
                if n1x < iyu1:
                    n1x = iyu1
                n2x = iyu2

        series = record.series
        # adjust
        m1 = record.rel_first_month + record.good_start_idx
        m2 = record.rel_first_month + record.good_end_idx - 1
        offset = record.good_start_idx  # index of first valid month
        a, b = adjust(first_year, record, series, fit, n1x, n2x, first + iyoff,
                      last + iyoff, m1, m2, offset)
        # a and b are numbers of new first and last valid months
        aa = a - m1
        bb = b - a + 1
        record.set_series(a - 1 + first_year * 12 + 1,
                          series[aa + offset:aa + offset + bb])
        record.begin = ((a - 1) / 12) + first_year
        record.first = record.begin
        record.end = ((b - 1) / 12) + first_year
        record.last = record.last_year
        yield record
示例#44
0
def SBBXtoBX(data):
    """Simultaneously combine the land series and the ocean series and
    combine subboxes into boxes.  *data* should be an iterator of
    (land, ocean) subbox series pairs. Returns an iterator of box data.
    """

    # First item from iterator is normally a pair of metadataobjects,
    # one for land, one for ocean.  If we are piping step3 straight into
    # step5 then it is not a pair.  In that case we synthesize missing
    # ocean data.
    meta = data.next()
    try:
        land_meta, ocean_meta = meta
    except (TypeError, ValueError):
        # Use the land meta object for both land and ocean data
        land_meta, ocean_meta = meta, meta
        print "No ocean data; using land data only"
        data = blank_ocean_data(data)

    # number of subboxes within each box
    nsubbox = 100

    # TODO: Formalise use of only monthlies, see step 3.
    assert land_meta.mavg == 6
    NYRSIN = land_meta.monm / 12
    combined_year_beg = min(land_meta.yrbeg, ocean_meta.yrbeg)
    # Index into the combined array of the first year of the land data.
    land_offset = 12 * (land_meta.yrbeg - combined_year_beg)
    # As land_offset but for ocean data.
    ocean_offset = 12 * (ocean_meta.yrbeg - combined_year_beg)
    combined_n_months = max(land_meta.monm + land_offset,
                            land_meta.monm + ocean_offset)

    info = [
        land_meta.mo1, land_meta.kq, land_meta.mavg, land_meta.monm,
        land_meta.monm4, combined_year_beg, land_meta.missing_flag,
        land_meta.precipitation_flag
    ]

    info[4] = 2 * land_meta.monm + 5
    yield (info, land_meta.title)

    for box_number, box in enumerate(eqarea.grid()):
        # Averages for the land and ocean (one series per subbox)...
        avg = []
        wgtc = []
        # Eat the records from land and ocean 100 (nsubbox) at a time.
        # In other words, all 100 subboxes for the box.
        landsub, oceansub = zip(*itertools.islice(data, nsubbox))
        # :todo: combine below zip with above zip?
        for i, l, o in zip(range(nsubbox), landsub, oceansub):
            a = [MISSING] * combined_n_months
            if (o.good_count < parameters.subbox_min_valid
                    or l.d < parameters.subbox_land_range):
                # use land series for this subbox
                a[land_offset:land_offset + len(l.series)] = l.series
                wgtc.append(l.good_count)
            else:
                # use ocean series for this subbox
                a[ocean_offset:ocean_offset + len(o.series)] = o.series
                wgtc.append(o.good_count)
            avg.append(a)

        # GISTEMP sort.
        # We want to end up with IORDR, the permutation array that
        # represents the sorter order.  IORDR[0] is the index (into the
        # *wgtc* array) of the longest record, IORDR[1] the index of the
        # next longest record, and so on.  We do that by decorating the
        # *wgtc* array with indexes 0 to 99, and then extracting the
        # (permuted) indexes into IORDR.
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        IORDR = range(nsubbox)
        sort(IORDR, lambda x, y: wgtc[y] - wgtc[x])

        # From here to the "for" loop over the cells (below) we are
        # initialising data for the loop.  Primarily the AVGR and WTR
        # arrays.
        nc = IORDR[0]

        # Weights for the box's record.
        wtr = [a != MISSING for a in avg[nc]]
        # Box record
        avgr = avg[nc][:]

        # Loop over the remaining cells.
        for nc in IORDR[1:]:
            if wgtc[nc] >= parameters.subbox_min_valid:
                series.combine(avgr, wtr, avg[nc], 1, 0,
                               combined_n_months / 12,
                               parameters.box_min_overlap)

        series.anomalize(avgr, parameters.subbox_reference_period,
                         combined_year_beg)
        ngood = sum(valid(a) for a in avgr)
        yield (avgr, wtr, ngood, box)
    # We've now consumed all 8000 input boxes and yielded 80 boxes.  We
    # need to tickle the input to check that it is exhausted and to
    # cause it to run the final tail of its generator.
    # We expect the call to .next() to raise StopIteration, which is
    # just what we want.
    data.next()
    # Ordinarily we never reach here.
    assert 0, "Too many input records"
示例#45
0
def annual_anomaly(record):
    """Computes annual anomalies for the station record *record*.
    Returns a list of annual anomalies, one datum for each year (12
    months) of the input record.  Years for which an annual anomaly
    cannot be computed are recorded as MISSING.  The returned series is
    padded so that it begins in BASE_YEAR (that is, 1880).

    If no anomalies can be computed, then None is returned.

    The algorithm is as follows: compute monthly averages, then
    monthly anomalies, then seasonal anomalies (means of monthly
    anomalies for at least two months) then annual anomalies (means
    of seasonal anomalies for at least three seasons).

    This function assumes that the series starts in January.
    """

    # Set to True if we have an annual anomaly for at least one year.
    good = False
    series = record.series
    monthly_means = []
    for m in range(12):
        month_data = series[m::12]
        # Neglect December of final year, as we do not use its season.
        if m == 11:
            month_data = month_data[:-1]
        month_data = filter(valid, month_data)
        monthly_means.append(float(sum(month_data)) / len(month_data))
    annual_anoms = []
    first = None
    for y in range(len(series)/12):
        # Seasons are Dec-Feb, Mar-May, Jun-Aug, Sep-Nov.
        # (Dec from previous year).
        total = [0.0] * 4 # total monthly anomaly for each season
        count = [0] * 4   # number of valid months in each season
        for m in range(-1, 11):
            index = y * 12 + m
            if index >= 0: # no Dec value in year -1
                datum = series[index]
                if valid(datum):
                    # season number 0-3
                    season = (m+1) // 3
                    total[season] += datum - monthly_means[m % 12]
                    count[season] += 1
        season_anomalies = [] # list of valid seasonal anomalies
        for s in range(4):
            # valid seasonal anomaly requires at least 2 valid months
            if count[s] >= 2:
                season_anomalies.append(total[s]/count[s])
        # valid annual anomaly requires at least 3 valid seasons
        if len(season_anomalies) > 2:
            good = True
            annual_anoms.append(sum(season_anomalies) / len(season_anomalies))
        else:
            annual_anoms.append(MISSING)

    if good:
        assert record.first_year >= giss_data.BASE_YEAR
        # Pad beginning of series so that it starts in
        # giss_data.BASE_YEAR
        pad = [MISSING] * (record.first_year - giss_data.BASE_YEAR) 
        return pad + annual_anoms
    else:
        return None