Пример #1
0
    def _forecast(self, ageGroup, epiweek):
        # season setup and sanity check
        ew1 = flu.join_epiweek(self.test_season, 40)
        ew2 = flu.join_epiweek(self.test_season + 1, 17)
        print("test season:", self.test_season, "ew1:", ew1, "epiweek:",
              epiweek)
        if not ew1 <= epiweek <= ew2:
            raise Exception('`epiweek` outside of `test_season`')

        # get past values (left half) from the Epidata API
        response = Epidata.flusurv('network_all',
                                   Epidata.range(ew1, epiweek),
                                   issues=epiweek)
        epidata = Forecaster.Utils.decode(response)

        pinned = [row[ageGroup] for row in epidata]

        if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1:
            raise Exception('missing ILINet data')
        # get the user submissions (right half) from the database
        print("ageGroup", ageGroup, "epiweek", epiweek)
        submissions = self.fetch_submissions(ageGroup, epiweek)
        self._num_users = len(submissions)
        if self.verbose:
            print(' [EC] %d users found for %s on %d' %
                  (len(submissions), ageGroup, epiweek))
        # concatenate observed data and user submissions
        return [pinned + sub for sub in submissions]
Пример #2
0
 def _get_unstable(self, region, lag):
     ranges = []
     for s in range(2010, self.test_season):
         ew1 = flu.join_epiweek(s + 0, 40)
         ew2 = flu.join_epiweek(s + 1, 20)
         ranges.append(Epidata.range(ew1, ew2))
     if self.forecast_type == ForecastType.WILI:
         epidata = Forecaster.Utils.decode(
             Epidata.fluview(region, ranges, lag=lag))
         return dict([(row['epiweek'], row['wili']) for row in epidata])
     else:
         epidata = Forecaster.Utils.decode(
             Epidata.flusurv('network_all', ranges, lag=lag))
         return dict([(row['epiweek'], row[region]) for row in epidata])
Пример #3
0
 def _get_partial_trajectory(self, epiweek, valid=True):
   y, w = EW.split_epiweek(epiweek)
   if w < 30:
     y -= 1
   ew1 = EW.join_epiweek(y, 30)
   ew2 = epiweek
   limit = EW.add_epiweeks(ew2, -5)
   weeks = Epidata.range(ew1, ew2)
   stable = Epidata.check(Epidata.fluview(self.region, weeks))
   try:
     unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2))
   except:
     unstable = []
   wili = {}
   for row in stable:
     ew, value = row['epiweek'], row['wili']
     if not valid or ew < limit:
       wili[ew] = value
   for row in unstable:
     ew, value = row['epiweek'], row['wili']
     wili[ew] = value
   curve = []
   for ew in EW.range_epiweeks(ew1, ew2, inclusive=True):
     if ew not in wili:
       if valid:
         t = 'unstable'
       else:
         t = 'any'
       raise Exception('wILI (%s) not available for week %d' % (t, ew))
     curve.append(wili[ew])
   n1 = EW.delta_epiweeks(ew1, ew2) + 1
   n2 = len(curve)
   if n1 != n2:
     raise Exception('missing data (expected %d, found %d)' % (n1, n2))
   return curve
Пример #4
0
def get_ilinet_data(row):
    if row[0] == 'REGION TYPE' and row != [
            'REGION TYPE', 'REGION', 'YEAR', 'WEEK', '% WEIGHTED ILI',
            '%UNWEIGHTED ILI', 'AGE 0-4', 'AGE 25-49', 'AGE 25-64', 'AGE 5-24',
            'AGE 50-64', 'AGE 65', 'ILITOTAL', 'NUM. OF PROVIDERS',
            'TOTAL PATIENTS'
    ]:
        raise Exception('header row has changed')
    if len(row) == 1 or row[0] == 'REGION TYPE':
        # this is a header row
        return None
    if row[5] == 'X':
        # ILI isn't reported, ignore this row
        return None
    return {
        'location': fluview_locations.get_location_name(*row[:2]),
        'epiweek': join_epiweek(int(row[2]), int(row[3])),
        'wili': optional_float(*row[4:6]),
        'ili': float(row[5]),
        'age0': optional_int(row[6]),
        'age1': optional_int(row[9]),
        'age2': optional_int(row[8]),
        'age3': optional_int(row[7]),
        'age4': optional_int(row[10]),
        'age5': optional_int(row[11]),
        'n_ili': optional_int(row[12]),
        'n_providers': optional_int(row[13]),
        'n_patients': optional_int(row[14]),
    }
Пример #5
0
 def __init__(self, region):
   self.region = region
   weeks = Epidata.range(200330, 202330)
   rows = Epidata.check(Epidata.fluview(self.region, weeks))
   self.seasons = {}
   for row in rows:
     ew, wili = row['epiweek'], row['wili']
     y, w = EW.split_epiweek(ew)
     if w < 30:
       y -= 1
     i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew)
     if y not in self.seasons:
       self.seasons[y] = {}
     if 0 <= i < 52:
       self.seasons[y][i] = wili
   years = sorted(list(self.seasons.keys()))
   for year in years:
     if len(self.seasons[year]) != 52:
       del self.seasons[year]
   if 2008 in self.seasons and 2009 in self.seasons:
     for i in range(40, 52):
       self.seasons[2008][i] = self.seasons[2009][i]
     del self.seasons[2009]
   curve = lambda y: [self.seasons[y][i] for i in range(52)]
   self.years = sorted(list(self.seasons.keys()))
   self.curves = dict([(y, curve(y)) for y in self.years])
Пример #6
0
 def _forecast(self, region, epiweek):
   # season setup and sanity check
   ew1 = flu.join_epiweek(self.test_season, 40)
   ew2 = flu.join_epiweek(self.test_season + 1, 20)
   if not ew1 <= epiweek <= ew2:
     raise Exception('`epiweek` outside of `test_season`')
   # get past values (left half) from the Epidata API
   epidata = Forecaster.Utils.decode(Epidata.fluview(region, Epidata.range(ew1, epiweek), issues=epiweek))
   pinned = [row['wili'] for row in epidata]
   if len(pinned) != flu.delta_epiweeks(ew1, epiweek) + 1:
     raise Exception('missing ILINet data')
   # get the user submissions (right half) from the database
   submissions = self.fetch_submissions(region, epiweek)
   self._num_users = len(submissions)
   print(' [EC] %d users found for %s on %d' % (len(submissions), region, epiweek))
   # concatenate observed data and user submissions
   return [pinned + sub for sub in submissions]
Пример #7
0
 def fetch_submissions(self, region, epiweek_now):
     final_week = flu.join_epiweek(self.test_season + 1, 20)
     self.cur = self.cnx.cursor()
     self.cur.execute(
         """
 SELECT
   u.`id` `user_id`, f.`epiweek`, f.`wili`
 FROM (
   SELECT
     u.*
   FROM
     `ec_fluv_users_mturk_2019` u
   JOIN
     `ec_fluv_defaults` d
   ON
     TRUE
   LEFT JOIN
     `ec_fluv_user_preferences_mturk` p
   ON
     p.`user_id` = u.`id` AND p.`name` = d.`name`
   WHERE
     d.`name` = '_debug' AND coalesce(p.`value`, d.`value`) = '0'
   ) u
 JOIN
   `ec_fluv_submissions_mturk` s
 ON
   s.`user_id` = u.`id`
 JOIN
   `ec_fluv_forecast_mturk` f
 ON
   f.`user_id` = u.`id` AND f.`region_id` = s.`region_id` AND f.`epiweek_now` = s.`epiweek_now`
 JOIN
   `ec_fluv_regions` r
 ON
   r.`id` = s.`region_id`
 WHERE
   r.`fluview_name` = %s AND s.`epiweek_now` = %s AND f.`epiweek` <= %s AND f.`wili` > 0
 ORDER BY
   u.`id` ASC, f.`epiweek` ASC
 """, (region, epiweek_now, final_week))
     submissions = {}
     for (user, epiweek, wili) in self.cur:
         if self.users is not None and user not in self.users:
             continue
         if user not in submissions:
             submissions[user] = []
         submissions[user].append(wili)
     self.cur.close()
     curves = []
     expected_weeks = flu.delta_epiweeks(epiweek_now, final_week)
     for user in submissions:
         if len(submissions[user]) != expected_weeks:
             print(
                 ' [EC] warning: missing data in user sumission [%d|%s|%d]'
                 % (user, region, epiweek_now))
         else:
             curves.append(submissions[user])
     return curves
Пример #8
0
 def train(self, epiweek):
   curves = []
   for year in self.years:
     season_end = EW.join_epiweek(year + 1, 29)
     if epiweek >= season_end:
       curves.append(self.curves[year])
   self.model = Archetype(curves)
   self.training_week = epiweek
   return curves, self.model
Пример #9
0
    def _get_current(self, region, epiweek, forecast_type):
        ew1 = flu.join_epiweek(self.test_season + 0, 40)
        ew2 = flu.join_epiweek(self.test_season + 1, 20)
        weeks = Epidata.range(ew1, ew2)
        if self.forecast_type == ForecastType.WILI:
            print('fetching history data for:')
            print(region, epiweek, weeks)
            epidata = Forecaster.Utils.decode(
                Epidata.fluview(region, weeks, issues=epiweek))
            data = [row['wili'] for row in epidata]
            # print (data)
        else:
            epidata = Forecaster.Utils.decode(
                Epidata.flusurv('network_all', weeks, issues=epiweek))
            data = [row[region] for row in epidata]

        if len(data) != flu.delta_epiweeks(ew1, epiweek) + 1:
            raise Exception('missing data')
        return data
Пример #10
0
 def _forecast(self, region, epiweek):
   print('inside hybrid._forecast, region, epiweek:', region, epiweek)
   P = self.past._forecast(region, epiweek)
   F = self.future._forecast(region, epiweek)
   print('inside hybrid._forecast, len P, len F', len(P), len(F))
   i = flu.delta_epiweeks(flu.join_epiweek(self.test_season, 40), epiweek)
   curves = []
   for j in range(max(len(P), len(F))):
     p, f = P[j % len(P)], F[j % len(F)]
     curves.append(list(p[:i]) + list(f[i:]))
   if self._callback is not None:
     self._callback()
   return curves
Пример #11
0
    def _forecast(self, region, epiweek):
        ew1 = flu.join_epiweek(self.test_season + 0, 40)
        ew2 = flu.join_epiweek(self.test_season + 1, 24)
        num_weeks = flu.delta_epiweeks(ew1, ew2)
        print('fetching past data until week %d' % (epiweek))
        observed = self._get_current(region, epiweek, self.forecast_type)

        mean, var = self.emp_mean[region].copy(), self.emp_var[region].copy()
        for ew in flu.range_epiweeks(ew1, flu.add_epiweeks(epiweek, 1)):
            i = flu.delta_epiweeks(ew1, ew)
            lag = flu.delta_epiweeks(ew1, epiweek) - i
            lag = min(lag, len(self.bf_var[region]) - 1)
            mean[i] = observed[i]
            var[i] = self.bf_var[region][lag]
        curves = Forecaster.Utils.sample_normal_var(mean, var,
                                                    self.num_samples)
        if not self.do_sampling:
            offset = flu.delta_epiweeks(ew1, epiweek) + 1
            for (i, curve) in enumerate(curves):
                index = i % len(self.emp_curves[region])
                curve[offset:] = self.emp_curves[region][index][offset:]
        return curves
Пример #12
0
    def test_from_epiweek(self):
        for y, m, d, ey, ew in FunctionTests.sample_epiweeks:
            epwk = utils_epiweek.join_epiweek(ey, ew)
            with self.subTest(y=y, m=m, d=d, epwk=epwk):
                date1 = EpiDate(y, m, d)
                date2 = EpiDate.from_epiweek(ey, ew)
                self.assertEqual(date1.get_ew(), epwk)
                self.assertEqual(date2.get_ew(), epwk)
                self.assertEqual(date2.get_day_of_week(), 3)

        for year in range(2000, 2020):
            for week in range(1, utils_epiweek.get_num_weeks(year) + 1):
                epwk = utils_epiweek.join_epiweek(year, week)
                date = EpiDate.from_epiweek(year, week)
                self.assertEqual(date.get_ew(), epwk)
                self.assertEqual(date.get_day_of_week(), 3)

        with self.assertRaises(Exception):
            EpiDate.from_epiweek(2017, 0)
        with self.assertRaises(Exception):
            EpiDate.from_epiweek(2017, 53)
        with self.assertRaises(Exception):
            EpiDate.from_epiweek(0, 30)
Пример #13
0
 def _train(self, region):
     if region in self.bf_var:
         # already trained
         return
     if len(region) == 2:
         # TODO: this is a hack for state ILI
         # assume backfill of region 4
         print('FIXME: setting backfill for %s as hhs4' % region)
         self.bf_var[region] = self.bf_var['hhs4']
         self.emp_mean[region] = self.emp_mean['hhs4']
         self.emp_var[region] = self.emp_var['hhs4']
         self.emp_curves[region] = self.emp_curves['hhs4']
         return
     stable = self._get_stable(region)
     start_weeks = [flu.get_season(ew)[0] for ew in stable.keys()]
     curves = []
     seasons = set(
         [flu.split_epiweek(ew)[0] for ew in start_weeks if ew is not None])
     for s in seasons:
         ew1 = flu.join_epiweek(s + 0, 40)
         if self.forecast_type == ForecastType.WILI:
             ew2 = flu.add_epiweeks(ew1, 37)
         else:
             ew2 = flu.add_epiweeks(ew1, 29)
         # print("stable: ", stable)
         # print("range_epiweeks: ", [i for i in flu.range_epiweeks(ew1, ew2)])
         curve = [stable[ew] for ew in flu.range_epiweeks(ew1, ew2)]
         curves.append(curve)
     self.emp_mean[region] = np.mean(curves, axis=0)
     self.emp_var[region] = np.var(curves, axis=0, ddof=1)
     self.emp_curves[region] = curves
     if self.backfill_weeks is None:
         self.bf_var[region] = [0]
     else:
         self.bf_var[region] = []
         for lag in range(self.backfill_weeks):
             unstable = self._get_unstable(region, lag)
             changes = [
                 stable[ew] - unstable[ew]
                 for ew in stable.keys() & unstable.keys()
             ]
             if len(changes) < 2:
                 raise Exception('not enough data')
             self.bf_var[region].append(np.var(changes, ddof=1))
     print(
         ' %5s: %s' %
         (region, ' '.join(['%.3f' % (b**0.5)
                            for b in self.bf_var[region]])))
Пример #14
0
    def _get_stable(self, region):
        ranges = []
        for s in range(2003, self.test_season):
            if s == 2009:
                continue
            ew1 = flu.join_epiweek(s, 40)
            ew2 = flu.add_epiweeks(ew1, 37)
            ranges.append(Epidata.range(ew1, ew2))

        if self.forecast_type == ForecastType.WILI:
            epidata = Forecaster.Utils.decode(Epidata.fluview(region, ranges))
            return dict([(row['epiweek'], row['wili']) for row in epidata])
        else:
            epidata = Forecaster.Utils.decode(
                Epidata.flusurv('network_all', ranges))
            return dict([(row['epiweek'], row[region]) for row in epidata])
Пример #15
0
def get_public_data(row):
    hrow1 = [
        'REGION TYPE', 'REGION', 'SEASON_DESCRIPTION', 'TOTAL SPECIMENS',
        'A (2009 H1N1)', 'A (H3)', 'A (Subtyping not Performed)', 'B', 'BVic',
        'BYam', 'H3N2v'
    ]
    hrow2 = [
        'REGION TYPE', 'REGION', 'YEAR', 'WEEK', 'TOTAL SPECIMENS',
        'A (2009 H1N1)', 'A (H3)', 'A (Subtyping not Performed)', 'B', 'BVic',
        'BYam', 'H3N2v'
    ]
    if row[0] == 'REGION TYPE' and row != hrow1 and row != hrow2:
        raise Exception('header row has changed for public health lab data.')
    if len(row) == 1 or row[0] == 'REGION TYPE':
        # header row
        return None
    if row[3] == 'X':
        # data is not reported, ignore this row
        return None
    # handle case where data is reported by season, not by epiweek
    is_weekly = len(row) == len(hrow2)
    # set epiweek
    if is_weekly:
        epiweek = join_epiweek(int(row[2]), int(row[3]))
    else:
        epiweek = int(row[2][7:11]) * 100 + 40
    # row offset
    offset = 1 if is_weekly else 0
    return {
        'location': fluview_locations.get_location_name(*row[:2]),
        'epiweek': epiweek,
        'total_specimens': int(row[3 + offset]),
        'total_a_h1n1': optional_int(row[4 + offset]),
        'total_a_h3': optional_int(row[5 + offset]),
        'total_a_h3n2v': optional_int(row[10 + offset]),
        'total_a_no_sub': optional_int(row[6 + offset]),
        'total_b': optional_int(row[7 + offset]),
        'total_b_vic': optional_int(row[8 + offset]),
        'total_b_yam': optional_int(row[9 + offset])
    }
Пример #16
0
def get_clinical_data(row):
    if row[0] == 'REGION TYPE' and row != [
            'REGION TYPE', 'REGION', 'YEAR', 'WEEK', 'TOTAL SPECIMENS',
            'TOTAL A', 'TOTAL B', 'PERCENT POSITIVE', 'PERCENT A', 'PERCENT B'
    ]:
        raise Exception('header row has changed for clinical lab data.')
    if len(row) == 1 or row[0] == 'REGION TYPE':
        # this is a header row
        return None
    if row[4] == 'X':
        # data is not reported, ignore this row
        return None
    # ignore percentage calculations for now
    return {
        'location': fluview_locations.get_location_name(*row[:2]),
        'epiweek': join_epiweek(int(row[2]), int(row[3])),
        'total_specimens': int(row[4]),
        'total_a': optional_int(row[5]),
        'total_b': optional_int(row[6]),
        'percent_positive': nullable_float(row[7]),
        'percent_a': nullable_float(row[8]),
        'percent_b': nullable_float(row[9])
    }
Пример #17
0
 def test_get_ew(self):
     for y, m, d, ey, ew in FunctionTests.sample_epiweeks:
         epwk = utils_epiweek.join_epiweek(ey, ew)
         with self.subTest(y=y, m=m, d=d, epwk=epwk):
             self.assertEqual(EpiDate(y, m, d).get_ew(), epwk)
Пример #18
0
    def forecast(self, epiweek):
        """
    `epiweek`: the most recent epiweek for which ILINet data is available
    """

        # sanity checks
        flu.check_epiweek(epiweek)
        season = flu.split_epiweek(flu.get_season(epiweek)[0])[0]
        week = flu.split_epiweek(epiweek)[1]
        first_epiweek = flu.join_epiweek(season, 40)
        offset = flu.delta_epiweeks(first_epiweek, epiweek)
        if season != self.test_season:
            raise Exception('unable to forecast season %d' % season)
        if 20 < week < 40:
            raise Exception('unable to forecast week %02d' % week)

        # initialize forecast
        forecast = Forecast(self.test_season, datetime.now(), self.name,
                            epiweek, self.forecast_type)

        # aliases for readability
        num_week_bins = forecast.season_length
        num_wili_bins = forecast.num_ili_bins
        wili_bin_size = forecast.ili_bin_size

        # if (forecast_type == ForecastType.HOSP):
        #     num_wili_bins = 601

        # uniform blending weights
        week_weight = self.min_week_prob * (num_week_bins + 1
                                            )  # include `none` "bin"
        wili_weight = self.min_wili_prob * num_wili_bins
        if week_weight > 1:
            raise Exception('`min_week_prob` is impossibly high')
        if wili_weight > 1:
            raise Exception('`min_wili_prob` is impossibly high')

        # forecast each region
        for region in self.locations:

            # draw sample curves
            curves = self._forecast(region, epiweek)

            # regional info
            if Locations.is_region(region):
                baseline = Targets.baselines[self.test_season][region]
            else:
                baseline = None

            # get all targets
            targets = [
                Targets.get_all_targets(c,
                                        baseline,
                                        offset,
                                        rule_season=self.test_season)
                for c in curves
            ]
            onsets = [t['onset'] for t in targets]
            peakweeks = [t['peakweek'] for t in targets]
            peaks = [t['peak'] for t in targets]
            x1s = [t['x1'] for t in targets]
            x2s = [t['x2'] for t in targets]
            x3s = [t['x3'] for t in targets]
            x4s = [t['x4'] for t in targets]

            # forecast each target
            allow_no_pw = self.test_season < 2016
            if Locations.is_region(region):
                # skip onset for states and hospitalization, and do it only for regions
                onset = self.forecast_weeks(first_epiweek, num_week_bins,
                                            onsets, week_weight,
                                            self.smooth_weeks_bw, True)

            peakweek = self.forecast_weeks(first_epiweek, num_week_bins,
                                           peakweeks, week_weight,
                                           self.smooth_weeks_bw, allow_no_pw)
            peak = self.forecast_wili(wili_bin_size, num_wili_bins, peaks,
                                      wili_weight, self.smooth_wili_bw)
            x1 = self.forecast_wili(wili_bin_size, num_wili_bins, x1s,
                                    wili_weight, self.smooth_wili_bw)
            x2 = self.forecast_wili(wili_bin_size, num_wili_bins, x2s,
                                    wili_weight, self.smooth_wili_bw)
            x3 = self.forecast_wili(wili_bin_size, num_wili_bins, x3s,
                                    wili_weight, self.smooth_wili_bw)
            x4 = self.forecast_wili(wili_bin_size, num_wili_bins, x4s,
                                    wili_weight, self.smooth_wili_bw)

            # fill in the forecast data
            fc = forecast.get_or_create_forecast(region)
            if Locations.is_region(region):
                fc.set_onset(*onset)
            fc.set_peakweek(*peakweek)
            fc.set_peak(*peak)
            fc.set_lookahead(1, *x1)
            fc.set_lookahead(2, *x2)
            fc.set_lookahead(3, *x3)
            fc.set_lookahead(4, *x4)

        # sanity check completed forecast
        forecast.sanity_check()
        return forecast