Exemplo n.º 1
0
def calc_monthly_USHCN_offsets(u_record, g_record):
    """Given a USHCN record `u_record` and a GHCN record `g_record`,
    using any overlapping years, computes a set of 12 monthly offsets
    which can be added to the GHCN record to most closely approximate
    the USHCN record."""
    u_years = u_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    g_years = g_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    reversed_year_pairs = list(reversed(zip(u_years, g_years)))

    diffs = [0.0] * 12
    for month in range(12):
        sum = 0.0
        count = 0
        for u_year, g_year in reversed_year_pairs:
            u_temp, g_temp = u_year[month], g_year[month]
            if valid(u_temp) and valid(g_temp):
                sum += u_temp - g_temp
                count += 1
                if count == parameters.USHCN_offset_max_months:
                    break
        if count > 0:
            diffs[month] = sum / count
    return diffs
Exemplo n.º 2
0
def calc_monthly_USHCN_offsets(u_record, g_record):
    """Given a USHCN record `u_record` and a GHCN record `g_record`,
    using any overlapping years, computes a set of 12 monthly offsets
    which can be added to the GHCN record to most closely approximate
    the USHCN record."""
    u_years = u_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    g_years = g_record.get_set_of_years(parameters.USHCN_offset_start_year,
                                        u_record.last_year)
    reversed_year_pairs = list(reversed(zip(u_years, g_years)))

    diffs = [0.0] * 12
    for month in range(12):
        sum = 0.0
        count = 0
        for u_year, g_year in reversed_year_pairs:
            u_temp, g_temp = u_year[month], g_year[month]
            if valid(u_temp) and valid(g_temp):
                sum += u_temp - g_temp
                count += 1
                if count == parameters.USHCN_offset_max_months:
                    break
        if count > 0:
            diffs[month] = sum / count
    return diffs
Exemplo n.º 3
0
def record_correlation(s, t, overlap=300):
    """Return the correlation between the monthly anomalies of the two
    records, where they have common months."""

    assert s.first_year == t.first_year
    a = list(s.series)
    b = list(t.series)
    series.anomalize(a)
    series.anomalize(b)
    common = [(u,v) for u,v in zip(a, b) if valid(u) and valid(v)]
    if len(common) < overlap:
        return None

    return correlation.pearson(*zip(*common))
Exemplo n.º 4
0
def get_longest_overlap(target, begin, records):
    """Find the record in the *records* set that has the longest
    overlap with the *target* by considering annual anomalies.  *target*
    is a sequence of monthly values starting in the year *begin*.

    A triple (record, diff, overlap) is returned; *diff* is the average
    difference in annual anomalies between *record* and *target*
    (positive when *record* is higher); *overlap* is the number of years
    in the overlap.  Even when there is no overlap _some_ record is
    returned and in that case *diff* is None and *overlap* is 0.
    
    Like other functions, assumes (and asserts) that *begin* is
    the first year for all the records.
    """

    # Annual mean, and annual anomaly sequence.
    mean, anoms = series.monthly_annual(target)
    overlap = 0
    diff = None
    # :todo: the records are consulted in an essentially arbitrary
    # order (which depends on the implementation), but the order
    # may affect the result.  Tie breaks go to the last record consulted.
    # For exact compatiblity with previous versions, we create a
    # temporary dict.
    t = dict((record.uid, record) for record in records)
    for record in t.values():
        common = [(rec_anom,anom)
          for rec_anom, anom in zip(record.ann_anoms, anoms)
          if valid(rec_anom) and valid(anom)]
        if len(common) < overlap:
            continue
        overlap = len(common)
        best_record = record
        S = sum((record.ann_mean+rec_anom) - (mean+anom)
                for rec_anom, anom in common)
        if common:
            diff = S / len(common)
    return best_record, diff, overlap
Exemplo n.º 5
0
def fresh_arrays(record, years):
    """Make and return a fresh pair of arrays: (*sums*, *wgts*).
    Each array is list (of length 12 * years; the input record should
    not be longer).

    The start of the result arrays will be the same as the start of the
    input *record*, which should generally be the same for all inputs.
    """

    nmonths = years * 12

    # Number of months in record.
    rec_months = len(record)
    assert rec_months <= nmonths

    sums = [0.0] * nmonths
    # Copy valid data rec_data into sums, assigning 0 for invalid data.
    sums[:rec_months] = (valid(x)*x for x in record.series)
    # Let wgts[i] be 1 where sums[i] is valid.
    wgts = [0] * nmonths
    wgts[:rec_months] = (int(valid(x)) for x in record.series)

    return sums, wgts
Exemplo n.º 6
0
 def adj(t, d):
     if valid(t):
         return t - d
     return t
Exemplo n.º 7
0
def stationvalidmonths(record):
    """Return the set of months for which the record has valid data.
    Each month is encoded as a number with january of year 1 being 12."""

    first = record.first_month - 1
    return set(first+i for i,v in enumerate(record.series) if valid(v))
Exemplo n.º 8
0
def find_quintuples(sums, wgts, record, new_id, log):
    """The *sums* and *wgts* arrays are assumed to begin in the same
    year as *record*.  Returns a boolean."""

    # An identifier common to all the log output.
    logid = "%s %s" % (new_id, record.uid)

    rec_begin = record.first_valid_year()
    rec_end = record.last_valid_year()

    actual_begin, actual_end = get_actual_endpoints(wgts, record.first_year)

    max_begin = max(actual_begin, rec_begin)
    min_end = min(actual_end, rec_end)
    # Since max_begin and min_end are integers, this rounds fractional
    # middle years up.
    middle_year = int(.5 * (max_begin + min_end) + 0.5)
    offset = (middle_year - record.first_year)
    log.write("max begin: %s\tmin end: %s\n" % (max_begin, min_end))

    new_data = average(sums, wgts)
    new_ann_mean, new_ann_anoms = series.monthly_annual(new_data)
    ann_std_dev = sigma(new_ann_anoms)
    log.write("ann_std_dev = %s\n" % ann_std_dev)

    rec_ann_anoms = record.ann_anoms
    rec_ann_mean = record.ann_mean

    # Whether we have an "overlap" or not.  We have an "overlap" if
    # within *rad* years either side of *middle_year* both records have
    # *parameters.station_combine_min_mid_year* valid annnual anomalies.
    ov_success = False
    # The overlap is "okay" when the difference in annual temperature is
    # below a certain threshold.
    okay_flag = False
    for rad in range(1, parameters.station_combine_bucket_radius + 1):
        # For the two series, get data from from -rad to rad (inclusive)
        # around the middle year.
        base = offset-rad
        base = max(0, base)
        limit = offset+rad+1
        new_middle = [x for x in new_ann_anoms[base:limit] if valid(x)]
        rec_middle = [x for x in rec_ann_anoms[base:limit] if valid(x)]
        if (len(new_middle) >= parameters.station_combine_min_mid_years
            and len(rec_middle) >= parameters.station_combine_min_mid_years):
            log.write("overlap success: %s\n" % logid)
            ov_success = True
            avg1 = sum(anom+new_ann_mean for anom in new_middle) / float(
              len(new_middle))
            avg2 = sum(anom+rec_ann_mean for anom in rec_middle) / float(
              len(rec_middle))
            diff = abs(avg1 - avg2)
            log.write("diff = %s\n" % diff)
            if diff < ann_std_dev:
                okay_flag = True
                log.write("combination success: %s\n" % logid)
            else:
                log.write("combination failure: %s\n" % logid)
            break
    if not ov_success:
        log.write("overlap failure: %s\n" % logid)
    log.write("counts: %d %d\n" % (len(new_middle), len(rec_middle)))
    return okay_flag
Exemplo n.º 9
0
 def adj(t, d):
     if valid(t):
         return t - d
     return t