Пример #1
0
def update(sensors, first_week=None, last_week=None, valid=False, test_mode=False):
  # most recent issue
  last_issue = get_most_recent_issue()

  # location information
  loc_info = StateInfo()

  # connect
  u, p = secrets.db.epi
  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
  cur = cnx.cursor()

  # update each sensor
  for (name, loc) in sensors:
    if loc == 'hhs':
      locations = loc_info.hhs
    elif loc == 'cen':
      locations = loc_info.cen
    elif loc == 'state' or loc == 'sta':
      locations = loc_info.sta
    else:
      locations = [loc]
    # update each location
    print(locations)
    for location in locations:
      # timing
      ew1, ew2 = first_week, last_week
      if ew1 is None:
        ew1 = get_last_update(cur, name, location)
      if ew2 is None:
        ew2 = flu.add_epiweeks(last_issue, +1)
      print('Updating %s-%s from %d to %d.' % (name, location, ew1, ew2))
      for test_week in flu.range_epiweeks(ew1, ew2, inclusive=True):
        train_week = flu.add_epiweeks(test_week, -1)
        try:
          value = {
            'gft': get_gft,
            'ght': get_ght,
            'ghtj': get_ghtj,
            'twtr': get_twtr,
            'wiki': get_wiki,
            'cdc': get_cdc,
            'epic': get_epic,
            'sar3': get_sar3,
            'arch': get_arch,
            'quid': get_quid,
          }[name](location, train_week, valid)
          print(' %4s %5s %d -> %.3f' % (name, location, test_week, value))
          # upload
          store_value(cur, name, location, test_week, value)
        except Exception as ex:
          print(' failed: %4s %5s %d' % (name, location, test_week), ex)
          #raise ex
        sys.stdout.flush()

  # disconnect
  cur.close()
  if not test_mode:
    cnx.commit()
  cnx.close()
Пример #2
0
 def _train(self, region):
     if region in self.bf_var:
         # already trained
         return
     if len(region) == 2:
         # TODO: this is a hack for state ILI
         # assume backfill of region 4
         print('FIXME: setting backfill for %s as hhs4' % region)
         self.bf_var[region] = self.bf_var['hhs4']
         self.emp_mean[region] = self.emp_mean['hhs4']
         self.emp_var[region] = self.emp_var['hhs4']
         self.emp_curves[region] = self.emp_curves['hhs4']
         return
     stable = self._get_stable(region)
     start_weeks = [flu.get_season(ew)[0] for ew in stable.keys()]
     curves = []
     seasons = set(
         [flu.split_epiweek(ew)[0] for ew in start_weeks if ew is not None])
     for s in seasons:
         ew1 = flu.join_epiweek(s + 0, 40)
         if self.forecast_type == ForecastType.WILI:
             ew2 = flu.add_epiweeks(ew1, 37)
         else:
             ew2 = flu.add_epiweeks(ew1, 29)
         # print("stable: ", stable)
         # print("range_epiweeks: ", [i for i in flu.range_epiweeks(ew1, ew2)])
         curve = [stable[ew] for ew in flu.range_epiweeks(ew1, ew2)]
         curves.append(curve)
     self.emp_mean[region] = np.mean(curves, axis=0)
     self.emp_var[region] = np.var(curves, axis=0, ddof=1)
     self.emp_curves[region] = curves
     if self.backfill_weeks is None:
         self.bf_var[region] = [0]
     else:
         self.bf_var[region] = []
         for lag in range(self.backfill_weeks):
             unstable = self._get_unstable(region, lag)
             changes = [
                 stable[ew] - unstable[ew]
                 for ew in stable.keys() & unstable.keys()
             ]
             if len(changes) < 2:
                 raise Exception('not enough data')
             self.bf_var[region].append(np.var(changes, ddof=1))
     print(
         ' %5s: %s' %
         (region, ' '.join(['%.3f' % (b**0.5)
                            for b in self.bf_var[region]])))
Пример #3
0
 def _get_features(self,
                   ew,
                   signal_to_truth_shift=0,
                   valid=False,
                   mask=np.ones((10), dtype=bool)):
     X = np.zeros((1, 10))
     i = self.ew2i[ew]
     X[0, 0] = 1
     for lag in range(3):
         if valid and not self.valid[i - lag][lag]:
             w = self.i2ew[i - lag]
             raise Exception('missing unstable wILI (ew=%d|lag=%d)' %
                             (w, lag))
         try:
             X[0, 1 + lag] = np.log(
                 np.maximum(
                     0.01,
                     self.data[i - lag - signal_to_truth_shift]['stable']))
         except Exception:
             X[0, 1 + lag] = np.nan
     for holiday in range(4):
         if EW.split_epiweek(EW.add_epiweeks(ew, holiday))[1] == 1:
             X[0, 4 + holiday] = 1
     y, w = EW.split_epiweek(ew)
     N = EW.get_num_weeks(y)
     offset = np.pi * 2 * w / N
     X[0, 8] = np.sin(offset)
     X[0, 9] = np.cos(offset)
     # todo linear time trend covariate?
     return X[:, mask]
Пример #4
0
def get_kcdc_data():
    issue = EpiDate.today().get_ew()
    last_season = issue // 100 + (1 if issue % 100 > 35 else 0)
    url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do'
    params = {
        'icdNm': 'influenza',
        'startYear': '2004',  # Started in 2004
        'endYear': str(last_season)
    }
    response = requests.post(url, params)
    datas = response.json()
    data = datas['data']
    ews = []
    ilis = []
    ew1 = 200436
    for year in range(2004, last_season):
        year_data = data[year - 2004]
        if year > 2004:
            ew1 = ews[-1] + 1
        ili_yr = year_data["VALUE"].split('`')
        ili_yr = [float(f) for f in ili_yr if f != '']
        ew2 = add_epiweeks(ew1, len(ili_yr))
        new_ews = list(range_epiweeks(ew1, ew2))
        for i in range(len(new_ews)):
            j = float(ili_yr[i])
            ilis.append(j)
            ews.append(new_ews[i])
    return ews, ilis
Пример #5
0
 def get_training_set_datasetname(location, epiweek, signal, target,
                                  signal_to_truth_ew_shift):
     ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
     groundTruth = dict()
     auth = secrets.api.datasetname_targets
     datasetnameData = Epidata.check(
         Epidata.datasetname_targets(auth, target, location, weeks0))
     for row in datasetnameData:
         groundTruth[row['epiweek']] = row['value']
     data = {}
     dropped_weeks = 0
     for signal_week in signal.keys():
         ground_truth_week = flu.add_epiweeks(signal_week,
                                              signal_to_truth_ew_shift)
         # skip the week we're trying to predict
         if ground_truth_week == ew3:
             continue
         sig = signal[signal_week]
         if ground_truth_week in groundTruth:
             label = groundTruth[ground_truth_week]
         else:
             dropped_weeks += 1
             continue
         data[ground_truth_week] = {'x': sig, 'y': label}
     if dropped_weeks:
         msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable'
         print(msg % (dropped_weeks, len(signal)))
     epiweeks = sorted(list(data.keys()))
     X = [data[week]['x'] for week in epiweeks]
     Y = [data[week]['y'] for week in epiweeks]
     return (epiweeks, X, Y)
Пример #6
0
 def _forecast(first_epiweek, num_bins, indices, uniform_weight,
               smooth_bw, allow_none):
     if smooth_bw > 0:
         print(
             ' [EC] warning: epicast doesnt smooth week bins, but smooth_bw = %.3f'
             % smooth_bw)
     num_none = indices.count(None)
     if num_none > 0 and not allow_none:
         raise Exception('target does not allow None, but None given')
     dist = Epicast.fit_distribution(indices, num_bins, 1, -0.5, False,
                                     num_users)
     dist *= len(indices) - num_none
     extra = [num_none] if allow_none else []
     dist = Forecaster.Utils.normalize(list(dist) + extra)
     dist = Forecaster.Utils.blend(dist, uniform_weight)
     if allow_none:
         dist, none = dist[:-1], dist[-1]
     else:
         none = None
     possibilities = [i for i in indices if i is not None]
     if len(possibilities) == 0:
         possibilities = [0]
     point = flu.add_epiweeks(first_epiweek,
                              int(np.median(possibilities)))
     return (dist, none, point)
Пример #7
0
 def _get_partial_trajectory(self, epiweek, valid=True):
   y, w = EW.split_epiweek(epiweek)
   if w < 30:
     y -= 1
   ew1 = EW.join_epiweek(y, 30)
   ew2 = epiweek
   limit = EW.add_epiweeks(ew2, -5)
   weeks = Epidata.range(ew1, ew2)
   stable = Epidata.check(Epidata.fluview(self.region, weeks))
   try:
     unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2))
   except:
     unstable = []
   wili = {}
   for row in stable:
     ew, value = row['epiweek'], row['wili']
     if not valid or ew < limit:
       wili[ew] = value
   for row in unstable:
     ew, value = row['epiweek'], row['wili']
     wili[ew] = value
   curve = []
   for ew in EW.range_epiweeks(ew1, ew2, inclusive=True):
     if ew not in wili:
       if valid:
         t = 'unstable'
       else:
         t = 'any'
       raise Exception('wILI (%s) not available for week %d' % (t, ew))
     curve.append(wili[ew])
   n1 = EW.delta_epiweeks(ew1, ew2) + 1
   n2 = len(curve)
   if n1 != n2:
     raise Exception('missing data (expected %d, found %d)' % (n1, n2))
   return curve
Пример #8
0
def get_weeks(epiweek):
  ew1 = 200330
  ew2 = epiweek
  ew3 = flu.add_epiweeks(epiweek, 1)
  weeks0 = Epidata.range(ew1, ew2)
  weeks1 = Epidata.range(ew1, ew3)
  return (ew1, ew2, ew3, weeks0, weeks1)
Пример #9
0
 def get_most_recent_issue(self):
   """Return the most recent epiweek for which FluView data is available."""
   ew2 = EpiDate.today().get_ew()
   ew1 = add_epiweeks(ew2, -9)
   response = self.epidata.fluview('nat', self.epidata.range(ew1, ew2))
   issues = [row['issue'] for row in self.epidata.check(response)]
   return max(issues)
Пример #10
0
  def update(self, sensors, first_week, last_week):
    """
    Compute sensor readings and store them in the database.
    """

    # most recent issue
    if last_week is None:
      last_issue = get_most_recent_issue(self.epidata)
      last_week = flu.add_epiweeks(last_issue, +1)

    # connect
    with self.database as database:

      # update each sensor
      for (name, loc) in sensors:

        # update each location
        for location in get_location_list(loc):

          # timing
          ew1 = first_week
          if ew1 is None:
            ew1 = database.get_most_recent_epiweek(name, location)
            if ew1 is None:
              # If an existing sensor reading wasn't found in the database and
              # no start week was given, just assume that readings should start
              # at 2014w01.
              ew1 = 201401
              print('%s-%s not found, starting at %d' % (name, location, ew1))

          args = (name, location, ew1, last_week)
          print('Updating %s-%s from %d to %d.' % args)
          for test_week in flu.range_epiweeks(ew1, last_week, inclusive=True):
            self.update_single(database, test_week, name, location)
Пример #11
0
  def get_naive_nowcast(self, loc):
    # Because final wILI is not known for multiple months, it's not possible to
    # implement a *real-time* random walk naive nowcaster. There are (at least)
    # two ways to define a substitute naive nowcaster:
    #   - Naive Oracle: assume final wILI is known at runtime (it's not)
    #     and define the nowcast as final wILI on the previous week.
    #   - Seasonal Naive: define the nowcast as final wILI on the same week
    #     one year in the past.
    # Naive Oracle has the disadvantage that it's not realistic (because of
    # backfill), and therefore it is unfairly advantaged. Seasonal Naive has
    # the disadvantage that wILI 52 weeks ago is only very loosely correlated
    # with wILI at runtime, and therefore it is unfairly disadvantaged.
    # (Ideally we would define the naive nowcast as preliminary wILI on the
    # previous week, but that data isn't generally available, except for
    # certain locations and seasons.)
    # It's not immediately clear which definition of "naive" is better in this
    # situation. The variable below controls which definition is used
    # throughout this analysis; 1 corresponds to Naive Oracle, and 52
    # corresponds to Seasonal Naive.
    delta = 1

    nowcast = {}
    truth = self.get_truth(loc)
    for ew1 in truth:
      ew0 = Epiweek.add_epiweeks(ew1, -delta)
      if ew0 in truth:
        nowcast[ew1] = truth[ew0]
    return nowcast
 def get_most_recent_issue(self, location):
     """Return the most recent epiweek for which paho_dengue data is available in given location."""
     ew2 = EpiDate.today().get_ew()
     ew1 = add_epiweeks(ew2, -52)
     response = self.epidata.paho_dengue(location,
                                         self.epidata.range(ew1, ew2))
     ews = [row['epiweek'] for row in self.epidata.check(response)]
     return max(ews)
 def get_weeks(self):
     """Return a list of weeks on which truth and sensors are both available."""
     latest_week = EpiDate.today().get_ew()
     latest_week = add_epiweeks(latest_week, -1)
     week_range = range_epiweeks(self.FIRST_DATA_EPIWEEK,
                                 latest_week,
                                 inclusive=True)
     return list(week_range)
Пример #14
0
 def extract(rows, fields, signal_to_truth_ew_shift):
     data = {}
     for row in rows:
         data[flu.add_epiweeks(row['epiweek'],
                               signal_to_truth_ew_shift)] = [
                                   float(row[f]) for f in fields
                               ]
     return data
Пример #15
0
    def get_update_range(self, first_week, last_week):
        """Return the range of epiweeks to update."""

        # default to most recent issue if a week range isn't given
        if not last_week:
            # repeat previous nowcast in case new data is available
            first_week = self.data_source.get_most_recent_issue()
            # nowcast the first week without ilinet data
            last_week = add_epiweeks(first_week, 1)
        return first_week, last_week
Пример #16
0
 def update_single(self, database, test_week, name, location):
   train_week = flu.add_epiweeks(test_week, -1)
   impl = self.implementations[name]
   try:
     value = impl(location, train_week, self.valid, self.target)
     print(' %4s %5s %d -> %.3f' % (name, location, test_week, value))
   except Exception as ex:
     value = None
     print(' failed: %4s %5s %d' % (name, location, test_week), ex)
   if value is not None:
     database.insert(self.target, name, location, test_week, value)
   sys.stdout.flush()
Пример #17
0
 def get_dengue_data(first_week, last_week):
     # Check week order
     if first_week > last_week:
         first_week, last_week = last_week, first_week
     # Bounds check
     if first_week < 200301 or last_week < 200301:
         raise Exception('week out of range')
     # Initialize data by week and location (zeroes are not reported)
     data = {}
     for week in range_epiweeks(first_week, add_epiweeks(last_week, 1)):
         data[week] = {}
         for location in NIDSS.LOCATION_TO_REGION.keys():
             data[week][location] = 0
     # Download CSV
     response = requests.get(NIDSS.DENGUE_URL)
     if response.status_code != 200:
         raise Exception('export Dengue failed [%d]' % response.status_code)
     csv = response.content.decode('big5-tw')
     # Parse the data
     lines = [l.strip() for l in csv.split('\n')[1:] if l.strip() != '']
     for line in lines:
         fields = line.split(',')
         location_b64 = base64.b64encode(fields[3].encode('utf-8'))
         location = NIDSS._TRANSLATED[location_b64]
         region = NIDSS.LOCATION_TO_REGION[location]
         imported_b64 = base64.b64encode(fields[6].encode('utf-8'))
         imported = imported_b64 == b'5piv'
         sex = fields[5]
         age = fields[7]
         count = int(fields[8])
         year = int(fields[1])
         week = int(fields[2])
         # Week 53 was reported each year in 2003-2007
         if year < 2008 and year != 2003 and week > 52:
             week = 52
         # Epiweek system change in 2009
         # See also: http://research.undefinedx.com/forum/index.php?topic=300.0
         if year == 2009:
             week -= 1
             if week == 0:
                 year, week = 2008, 53
         epiweek = year * 100 + week
         if epiweek < first_week or epiweek > last_week:
             # Outside of the requested range
             continue
         if epiweek not in data or location not in data[epiweek]:
             # Not a vaild U.S. epiweek
             raise Exception('data missing %d-%s' % (epiweek, location))
         # Add the counts to the location on this epiweek
         data[epiweek][location] += count
     # Return results indexed by week and location
     return data
Пример #18
0
 def _get_features(self, ew, valid=True):
   X = np.zeros((1, 7))
   i = self.ew2i[ew]
   X[0, 0] = 1
   for holiday in range(4):
     if EW.split_epiweek(EW.add_epiweeks(ew, holiday))[1] == 1:
       X[0, 1 + holiday] = 1
   y, w = EW.split_epiweek(ew)
   N = EW.get_num_weeks(y)
   offset = np.pi * 2 * w / N
   X[0, 5] = np.sin(offset)
   X[0, 6] = np.cos(offset)
   # todo linear time trend covariate?
   return X
Пример #19
0
    def _get_stable(self, region):
        ranges = []
        for s in range(2003, self.test_season):
            if s == 2009:
                continue
            ew1 = flu.join_epiweek(s, 40)
            ew2 = flu.add_epiweeks(ew1, 37)
            ranges.append(Epidata.range(ew1, ew2))

        if self.forecast_type == ForecastType.WILI:
            epidata = Forecaster.Utils.decode(Epidata.fluview(region, ranges))
            return dict([(row['epiweek'], row['wili']) for row in epidata])
        else:
            epidata = Forecaster.Utils.decode(
                Epidata.flusurv('network_all', ranges))
            return dict([(row['epiweek'], row[region]) for row in epidata])
Пример #20
0
 def _get_features(self, ew, valid=True):
   X = np.zeros((1, 8))
   i = self.ew2i[ew]
   X[0, 0] = 1
   for lag in range(3):
     if valid and not self.valid[i - lag][lag]:
       w = self.i2ew[i - lag]
       raise Exception('missing unstable wILI (ew=%d|lag=%d)' % (w, lag))
     X[0, 1 + lag] = self.data[i - lag][lag]
   for holiday in range(4):
     if EW.split_epiweek(EW.add_epiweeks(ew, holiday))[1] == 1:
       X[0, 4 + holiday] = 1
   # y, w = EW.split_epiweek(ew)
   # N = EW.get_num_weeks(y)
   # offset = np.pi * 2 * w / N
   # X[0, 8] = np.sin(offset)
   # X[0, 9] = np.cos(offset)
   return X
Пример #21
0
    def _forecast(self, region, epiweek):
        ew1 = flu.join_epiweek(self.test_season + 0, 40)
        ew2 = flu.join_epiweek(self.test_season + 1, 24)
        num_weeks = flu.delta_epiweeks(ew1, ew2)
        print('fetching past data until week %d' % (epiweek))
        observed = self._get_current(region, epiweek, self.forecast_type)

        mean, var = self.emp_mean[region].copy(), self.emp_var[region].copy()
        for ew in flu.range_epiweeks(ew1, flu.add_epiweeks(epiweek, 1)):
            i = flu.delta_epiweeks(ew1, ew)
            lag = flu.delta_epiweeks(ew1, epiweek) - i
            lag = min(lag, len(self.bf_var[region]) - 1)
            mean[i] = observed[i]
            var[i] = self.bf_var[region][lag]
        curves = Forecaster.Utils.sample_normal_var(mean, var,
                                                    self.num_samples)
        if not self.do_sampling:
            offset = flu.delta_epiweeks(ew1, epiweek) + 1
            for (i, curve) in enumerate(curves):
                index = i % len(self.emp_curves[region])
                curve[offset:] = self.emp_curves[region][index][offset:]
        return curves
Пример #22
0
  def extract_epiweek_and_team(filename):
    """
    Extract the submission epiweek (epiweek of most recently published report)
    and the team name from the file name of a flu contest submission.

    The return value is a tuple of:
      1. the submission epiweek (e.g. 201751)
      2. the team name (e.g. "delphi-epicast")
    """

    # this is the naming convention for 2017 flu contest submissions
    pattern = re.compile('^EW(\\d{2})-(.*)-(\\d{4})-(\\d{2})-(\\d{2}).csv$')
    match = pattern.match(os.path.basename(filename))
    if match is None:
      # only able to parse this specific naming convention
      raise Exception()

    week = int(match.group(1))
    team = match.group(2)
    year = int(match.group(3))
    month = int(match.group(4))
    day = int(match.group(5))
    epiweek = EpiDate(year, month, day).get_ew()

    # We know the week number, but the year has to be inferred from the
    # submission date. Since the week of submission is never less than the week
    # of the most recent report, we can step backwards from the week of
    # submission until we find the expected week number. Ordinarily, this will
    # take exactly two steps. For example, data collected on 2017w51 is
    # reported on 2017w52, and our forecast is submitted on 2018w01; so we
    # start with 2018w01 and step backwards until find the first week 51, which
    # is 2017w51.
    if not 1 <= week <= 53:
      # prevent an infinite loop
      raise Exception('invalid week number: %d' % week)
    while Epiweek.split_epiweek(epiweek)[1] != week:
      epiweek = Epiweek.add_epiweeks(epiweek, -1)

    return epiweek, team
Пример #23
0
 def get_week_forecast(first_epiweek, num_bins, indices, uniform_weight,
                       smooth_bw, allow_none):
     dist = [indices.count(i) for i in range(num_bins)]
     none = indices.count(None)
     if none > 0 and not allow_none:
         raise Exception(
             'target does not allow None, but None was provided')
     extra = [none] if allow_none else []
     temp = Forecaster.Utils.normalize(np.array(dist + extra))
     if smooth_bw > 0:
         # TODO: don't smooth across dist and norm
         temp = Forecaster.Utils.smooth(temp, smooth_bw)
     temp = Forecaster.Utils.blend(temp, uniform_weight)
     if allow_none:
         dist, none = temp[:-1], temp[-1]
     else:
         dist, none = temp, None
     possibilities = [i for i in indices if i is not None]
     if len(possibilities) == 0:
         possibilities = [0]
     point = flu.add_epiweeks(first_epiweek,
                              int(median_low(possibilities)))
     return (dist, none, point)
Пример #24
0
def update(locations,
           first=None,
           last=None,
           force_update=False,
           load_email=True):
    # download and prepare data first
    qd = quidel.QuidelData(DATAPATH, load_email)
    if not qd.need_update and not force_update:
        print('Data not updated, nothing needs change.')
        return

    qd_data = qd.load_csv()
    qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4)
    qd_ts = quidel.measurement_to_ts(qd_measurements,
                                     7,
                                     startweek=first,
                                     endweek=last)
    # connect to the database
    u, p = secrets.db.epi
    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
    cur = cnx.cursor()

    def get_num_rows():
        cur.execute('SELECT count(1) `num` FROM `quidel`')
        for (num, ) in cur:
            pass
        return num

    # check from 4 weeks preceeding the last week with data through this week
    cur.execute(
        'SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`')
    for (ew0, ew1) in cur:
        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
    ew0 = ew0 if first is None else first
    ew1 = ew1 if last is None else last
    print('Checking epiweeks between %d and %d...' % (ew0, ew1))

    # keep track of how many rows were added
    rows_before = get_num_rows()

    # check Quidel for new and/or revised data
    sql = '''
    INSERT INTO
      `quidel` (`location`, `epiweek`, `value`)
    VALUES
      (%s, %s, %s)
    ON DUPLICATE KEY UPDATE
      `value` = %s
  '''

    total_rows = 0

    for location in locations:
        if location not in qd_ts:
            continue
        ews = sorted(qd_ts[location].keys())
        num_missing = 0
        for ew in ews:
            v = qd_ts[location][ew]
            sql_data = (location, ew, v, v)
            cur.execute(sql, sql_data)
            total_rows += 1
            if v == 0:
                num_missing += 1
        if num_missing > 0:
            print(' [%s] missing %d/%d value(s)' %
                  (location, num_missing, len(ews)))

    # keep track of how many rows were added
    rows_after = get_num_rows()
    print('Inserted %d/%d row(s)' % (rows_after - rows_before, total_rows))

    # cleanup
    cur.close()
    cnx.commit()
    cnx.close()
Пример #25
0
        X = self._get_features(epiweek, valid=valid)
        return float(SAR3.dot(X, self.model)[0, 0])


if __name__ == '__main__':
    # args and usage
    parser = argparse.ArgumentParser()
    parser.add_argument('epiweek',
                        type=int,
                        help='most recently published epiweek (best 201030+)')
    parser.add_argument('region', type=str, help='region (nat, hhs, cen)')
    args = parser.parse_args()

    # options
    ew1, reg = args.epiweek, args.region
    ew2 = EW.add_epiweeks(ew1, 1)

    # train and predict
    print('Most recent issue: %d' % ew1)
    prediction = SAR3(reg).predict(ew1, True)
    print('Predicted wILI in %s on %d: %.3f' % (reg, ew2, prediction))
    res = Epidata.fluview(reg, ew2, auth=secrets.api.fluview)
    if res['result'] == 1:
        row = res['epidata'][0]
        issue = row['issue']
        wili = row['wili']
        err = prediction - wili
        print('Actual wILI as of %d: %.3f (err=%+.3f)' % (issue, wili, err))
    else:
        print('Actual wILI: unknown')
Пример #26
0
                        help='first epiweek override')
    parser.add_argument('--last',
                        '-l',
                        default=None,
                        type=int,
                        help='last epiweek override')
    parser.add_argument('--test',
                        '-t',
                        default=False,
                        action='store_true',
                        help='dry run only')
    args = parser.parse_args()

    # epiweeks and timing
    first, last = None, None
    if args.first is not None:
        first = args.first
    if args.last is not None:
        last = args.last
    if last is None:
        last = get_most_recent_issue()
    if first is None:
        first = flu.add_epiweeks(last, -52)
    if last < first:
        raise Exception('epiweeks in the wrong order')
    flu.check_epiweek(first, last)
    print('Updating epiweeks from %d to %d.' % (first, last))

    # make it happen
    update(first, last, args.test)
Пример #27
0
    def update_quid_db(qd_ts, update_field='value'):
        # check from 4 weeks preceeding the last week with data through this week
        cur.execute(
            'SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `quidel`'
        )
        for (ew0, ew1) in cur:
            ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
        ew0 = ew0 if first is None else first
        ew1 = ew1 if last is None else last
        print('Checking epiweeks between %d and %d...' % (ew0, ew1))

        # keep track of how many rows were added
        rows_before = get_num_rows()

        # check Quidel for new and/or revised data
        # Default update field is 'value'.
        sql = '''
      INSERT INTO
        `quidel` (`location`, `epiweek`, `value`)
      VALUES
        (%s, %s, %s)
      ON DUPLICATE KEY UPDATE
        `value` = %s
    '''
        if update_field == 'num_rows':
            sql = '''
        INSERT INTO
          `quidel` (`location`, `epiweek`, `num_rows`)
        VALUES
          (%s, %s, %s)
        ON DUPLICATE KEY UPDATE
          `num_rows` = %s
      '''
        elif update_field == 'num_devices':
            sql = '''
        INSERT INTO
          `quidel` (`location`, `epiweek`, `num_devices`)
        VALUES
          (%s, %s, %s)
        ON DUPLICATE KEY UPDATE
          `num_devices` = %s
      '''

        total_rows = 0

        for location in locations:
            if location not in qd_ts:
                continue
            ews = sorted(qd_ts[location].keys())
            num_missing = 0
            for ew in ews:
                v = qd_ts[location][ew]
                sql_data = (location, ew, v, v)
                cur.execute(sql, sql_data)
                total_rows += 1
                if v == 0:
                    num_missing += 1
            if num_missing > 0:
                print(' [%s] missing %d/%d value(s)' %
                      (location, num_missing, len(ews)))

        # keep track of how many rows were added
        rows_after = get_num_rows()
        print('Inserted %d/%d row(s)' % (rows_after - rows_before, total_rows))
Пример #28
0
def update(locations, terms, first=None, last=None, countries=['US']):
    # connect to the database
    u, p = secrets.db.epi
    cnx = mysql.connector.connect(user=u, password=p, database='epidata')
    cur = cnx.cursor()

    def get_num_rows():
        cur.execute('SELECT count(1) `num` FROM `ght`')
        for (num, ) in cur:
            pass
        return num

    # check from 4 weeks preceeding the last week with data through this week
    cur.execute(
        'SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`')
    for (ew0, ew1) in cur:
        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
    ew0 = ew0 if first is None else first
    ew1 = ew1 if last is None else last
    print('Checking epiweeks between %d and %d...' % (ew0, ew1))

    # keep track of how many rows were added
    rows_before = get_num_rows()

    # check Google Trends for new and/or revised data
    sql = '''
    INSERT INTO
      `ght` (`query`, `location`, `epiweek`, `value`)
    VALUES
      (%s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
      `value` = %s
  '''
    total_rows = 0
    ght = GHT(API_KEY)
    for term in terms:
        print(' [%s] using term' % term)
        ll, cl = len(locations), len(countries)
        for i in range(max(ll, cl)):
            location = locations[i] if i < ll else locations[0]
            country = countries[i] if i < cl else countries[0]
            try:
                #term2 = ('"%s"' % term) if ' ' in term else term
                term2 = term
                attempt = 0
                while True:
                    attempt += 1
                    try:
                        result = ght.get_data(ew0,
                                              ew1,
                                              location,
                                              term2,
                                              country=country)
                        break
                    except Exception as ex:
                        if attempt >= 5:
                            raise ex
                        else:
                            delay = 2**attempt
                            print(
                                ' [%s|%s] caught exception (will retry in %ds):'
                                % (term, location, delay), ex)
                            time.sleep(delay)
                values = [
                    p['value'] for p in result['data']['lines'][0]['points']
                ]
                ew = result['start_week']
                num_missing = 0
                for v in values:
                    # Default SQL location value for US country for backwards compatibility
                    # i.e. California's location is still stored as 'CA',
                    # and having location == 'US' is still stored as 'US'
                    sql_location = location if location != NO_LOCATION_STR else country

                    # Change SQL location for non-US countries
                    if country != 'US':
                        # Underscore added to distinguish countries from 2-letter US states
                        sql_location = country + "_"
                        if location != NO_LOCATION_STR:
                            sql_location = sql_location + location
                    sql_data = (term, sql_location, ew, v, v)
                    cur.execute(sql, sql_data)
                    total_rows += 1
                    if v == 0:
                        num_missing += 1
                        #print(' [%s|%s|%d] missing value' % (term, location, ew))
                    ew = flu.add_epiweeks(ew, 1)
                if num_missing > 0:
                    print(' [%s|%s] missing %d/%d value(s)' %
                          (term, location, num_missing, len(values)))
            except Exception as ex:
                print(
                    ' [%s|%s] caught exception (will NOT retry):' %
                    (term, location), ex)

    # keep track of how many rows were added
    rows_after = get_num_rows()
    print('Inserted %d/%d row(s)' % (rows_after - rows_before, total_rows))

    # cleanup
    cur.close()
    cnx.commit()
    cnx.close()
Пример #29
0
def update(locations, terms, first=None, last=None):
  # connect to the database
  u, p = secrets.db.epi
  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
  cur = cnx.cursor()

  def get_num_rows():
    cur.execute('SELECT count(1) `num` FROM `ght`')
    for (num,) in cur:
      pass
    return num

  # check from 4 weeks preceeding the last week with data through this week
  cur.execute('SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `ght`')
  for (ew0, ew1) in cur:
    ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
  ew0 = ew0 if first is None else first
  ew1 = ew1 if last is None else last
  print('Checking epiweeks between %d and %d...' % (ew0, ew1))

  # keep track of how many rows were added
  rows_before = get_num_rows()

  # check Google Trends for new and/or revised data
  sql = '''
    INSERT INTO
      `ght` (`query`, `location`, `epiweek`, `value`)
    VALUES
      (%s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
      `value` = %s
  '''
  total_rows = 0
  ght = GHT(API_KEY)
  for term in terms:
    print(' [%s] using term' % term)
    for location in locations:
      try:
        #term2 = ('"%s"' % term) if ' ' in term else term
        term2 = term
        attempt = 0
        while True:
          attempt += 1
          try:
            result = ght.get_data(ew0, ew1, location, term2)
            break
          except Exception as ex:
            if attempt >= 5:
              raise ex
            else:
              delay = 2 ** attempt
              print(' [%s|%s] caught exception (will retry in %ds):' % (term, location, delay), ex)
              time.sleep(delay)
        values = [p['value'] for p in result['data']['lines'][0]['points']]
        ew = result['start_week']
        num_missing = 0
        for v in values:
          sql_data = (term, location, ew, v, v)
          cur.execute(sql, sql_data)
          total_rows += 1
          if v == 0:
            num_missing += 1
            #print(' [%s|%s|%d] missing value' % (term, location, ew))
          ew = flu.add_epiweeks(ew, 1)
        if num_missing > 0:
          print(' [%s|%s] missing %d/%d value(s)' % (term, location, num_missing, len(values)))
      except Exception as ex:
        print(' [%s|%s] caught exception (will NOT retry):' % (term, location), ex)

  # keep track of how many rows were added
  rows_after = get_num_rows()
  print('Inserted %d/%d row(s)'%(rows_after - rows_before, total_rows))

  # cleanup
  cur.close()
  cnx.commit()
  cnx.close()
Пример #30
0
def get_most_recent_issue():
  # search for FluView issues within the last 10 weeks
  ew2 = EpiDate.today().get_ew()
  ew1 = flu.add_epiweeks(ew2, -9)
  rows = Epidata.check(Epidata.fluview('nat', Epidata.range(ew1, ew2)))
  return max([row['issue'] for row in rows])