Пример #1
0
def get_training_set(location, epiweek, signal, valid):
  ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
  auth = secrets.api.fluview
  try:
    result = Epidata.fluview(location, weeks0, issues=ew2, auth=auth)
    rows = Epidata.check(result)
    unstable = extract(rows, ['wili'])
  except:
    unstable = {}
  rows = Epidata.check(Epidata.fluview(location, weeks0, auth=auth))
  stable = extract(rows, ['wili'])
  data = {}
  num_dropped = 0
  for ew in signal.keys():
    if ew == ew3:
      continue
    sig = signal[ew]
    if ew not in unstable:
      if valid and flu.delta_epiweeks(ew, ew3) <= 5:
        raise Exception('unstable wILI is not available on %d' % ew)
      if ew not in stable:
        num_dropped += 1
        continue
      wili = stable[ew]
    else:
      wili = unstable[ew]
    data[ew] = {'x': sig, 'y': wili}
  if num_dropped:
    msg = 'warning: dropped %d/%d signal weeks because (w)ILI was unavailable'
    print(msg % (num_dropped, len(signal)))
  return get_training_set_data(data)
Пример #2
0
 def _get_partial_trajectory(self, epiweek, valid=True):
   y, w = EW.split_epiweek(epiweek)
   if w < 30:
     y -= 1
   ew1 = EW.join_epiweek(y, 30)
   ew2 = epiweek
   limit = EW.add_epiweeks(ew2, -5)
   weeks = Epidata.range(ew1, ew2)
   stable = Epidata.check(Epidata.fluview(self.region, weeks))
   try:
     unstable = Epidata.check(Epidata.fluview(self.region, weeks, issues=ew2))
   except:
     unstable = []
   wili = {}
   for row in stable:
     ew, value = row['epiweek'], row['wili']
     if not valid or ew < limit:
       wili[ew] = value
   for row in unstable:
     ew, value = row['epiweek'], row['wili']
     wili[ew] = value
   curve = []
   for ew in EW.range_epiweeks(ew1, ew2, inclusive=True):
     if ew not in wili:
       if valid:
         t = 'unstable'
       else:
         t = 'any'
       raise Exception('wILI (%s) not available for week %d' % (t, ew))
     curve.append(wili[ew])
   n1 = EW.delta_epiweeks(ew1, ew2) + 1
   n2 = len(curve)
   if n1 != n2:
     raise Exception('missing data (expected %d, found %d)' % (n1, n2))
   return curve
Пример #3
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(199301, 202330)
   auth = secrets.api.datasetname_targets
   rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000)
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in rx:
     ew, observation, lag = row['epiweek'], row['value'], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {'stable': False}
     lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
Пример #4
0
 def get_training_set_datasetname(location, epiweek, signal, target,
                                  signal_to_truth_ew_shift):
     ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
     groundTruth = dict()
     auth = secrets.api.datasetname_targets
     datasetnameData = Epidata.check(
         Epidata.datasetname_targets(auth, target, location, weeks0))
     for row in datasetnameData:
         groundTruth[row['epiweek']] = row['value']
     data = {}
     dropped_weeks = 0
     for signal_week in signal.keys():
         ground_truth_week = flu.add_epiweeks(signal_week,
                                              signal_to_truth_ew_shift)
         # skip the week we're trying to predict
         if ground_truth_week == ew3:
             continue
         sig = signal[signal_week]
         if ground_truth_week in groundTruth:
             label = groundTruth[ground_truth_week]
         else:
             dropped_weeks += 1
             continue
         data[ground_truth_week] = {'x': sig, 'y': label}
     if dropped_weeks:
         msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable'
         print(msg % (dropped_weeks, len(signal)))
     epiweeks = sorted(list(data.keys()))
     X = [data[week]['x'] for week in epiweeks]
     Y = [data[week]['y'] for week in epiweeks]
     return (epiweeks, X, Y)
Пример #5
0
 def fetch(weeks):
   # a map from epiweeks to a map of field-value pairs (for each article/hour)
   data = {}
   # field name index
   idx = 0
   # download each time series individually
   for article in articles:
     for hour in hours:
       # fetch the data from the API
       res = Epidata.wiki(article, epiweeks=weeks, hours=hour)
       epidata = Epidata.check(res)
       field_name = fields[idx]
       idx += 1
       # loop over rows of the response, ordered by epiweek
       for row in epidata:
         ew = row['epiweek']
         if ew not in data:
           # make a new entry for this epiweek
           data[ew] = {'epiweek': ew}
         # save the value of this field
         data[ew][field_name] = row['value']
   # convert the map to a list matching the API epidata list
   rows = []
   for ew in sorted(list(data.keys())):
     rows.append(data[ew])
   # spoof the API response
   return {
     'result': 1,
     'message': None,
     'epidata': rows,
   }
Пример #6
0
 def __init__(self, region):
   self.region = region
   weeks = Epidata.range(200330, 202330)
   rows = Epidata.check(Epidata.fluview(self.region, weeks))
   self.seasons = {}
   for row in rows:
     ew, wili = row['epiweek'], row['wili']
     y, w = EW.split_epiweek(ew)
     if w < 30:
       y -= 1
     i = EW.delta_epiweeks(EW.join_epiweek(y, 30), ew)
     if y not in self.seasons:
       self.seasons[y] = {}
     if 0 <= i < 52:
       self.seasons[y][i] = wili
   years = sorted(list(self.seasons.keys()))
   for year in years:
     if len(self.seasons[year]) != 52:
       del self.seasons[year]
   if 2008 in self.seasons and 2009 in self.seasons:
     for i in range(40, 52):
       self.seasons[2008][i] = self.seasons[2009][i]
     del self.seasons[2009]
   curve = lambda y: [self.seasons[y][i] for i in range(52)]
   self.years = sorted(list(self.seasons.keys()))
   self.curves = dict([(y, curve(y)) for y in self.years])
Пример #7
0
 def __init__(self, region, target, use_weekly=True):
     self.region = region
     self.target = target
     self.stts = 0
     weeks = Epidata.range(201401, 202330)
     rx = Epidata.check(Epidata.paho_dengue(self.region, weeks))
     self.data = {}
     self.valid = {}
     self.ew2i, self.i2ew = {}, {}
     for ew in EW.range_epiweeks(weeks['from'], weeks['to'],
                                 inclusive=True):
         # if 200916 <= ew <= 201015:
         #   continue
         i = len(self.ew2i)
         self.ew2i[ew] = i
         self.i2ew[i] = ew
     epiweeks = list(map(lambda elt: elt['epiweek'], rx))
     values = list(map(lambda elt: elt[self.target], rx))
     data = {elt['epiweek']: elt[self.target] for elt in rx}
     w_data = cum_to_week(data)
     for i in range(len(rx)):
         ew, observation = epiweeks[i], w_data[epiweeks[i]]
         if ew not in self.ew2i:
             continue
         i = self.ew2i[ew]
         if i not in self.data:
             self.data[i] = {}
             self.valid[i] = {'stable': False}
         lag = 'stable'
         self.data[i][lag] = observation
         self.valid[i][lag] = True
     self.weeks = sorted(list(self.data.keys()))
     self.dds = DengueDataSource.new_instance(target)
Пример #8
0
def download_preliminary_fluview(f):
    for lag in range(3):
        print('preliminary fluview', lag)
        resp = Epidata.fluview('nat', weeks, lag=lag, auth=secrets.api.fluview)
        rows = Epidata.check(resp)
        for row in rows:
            week, value = row['epiweek'], row['wili']
            f.write('%d,%s,%.5f\n' % (week, 'nat_%d' % lag, value))
Пример #9
0
def download_fluview(f):
    for loc in Locations.region_list:
        print('fluview', loc)
        resp = Epidata.fluview(loc, weeks, auth=secrets.api.fluview)
        rows = Epidata.check(resp)
        for row in rows:
            week, value = row['epiweek'], row['wili']
            f.write('%d,%s,%.5f\n' % (week, loc, value))
Пример #10
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(199301, 202330)
   auth = secrets.api.datasetname_targets
   # r0 = Epidata.check(Epidata.fluview(self.region, weeks, lag=0, auth=auth))
   # r1 = Epidata.check(Epidata.fluview(self.region, weeks, lag=1, auth=auth))
   # r2 = Epidata.check(Epidata.fluview(self.region, weeks, lag=2, auth=auth))
   # rx = Epidata.check(Epidata.fluview(self.region, weeks, auth=auth))
   r0 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 0)
   r1 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1)
   r2 = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 2)
   rx = mutate_rows_as_if_lagged(Epidata.check(Epidata.datasetname_targets(auth, self.target, self.region, weeks)), 1000000)
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in r0 + r1 + r2 + rx:
     ew, observation, lag = row['epiweek'], row['value'], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {0: False, 1: False, 2: False, 'stable': False}
     if not (0 <= lag <= 2):
       lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
     for lag in range(3):
       if lag not in self.data[i]:
         self.data[i][lag] = self.data[i]['stable']
Пример #11
0
 def __init__(self, region):
     self.region = region
     weeks = Epidata.range(200330, 202330)
     auth = secrets.api.fluview
     r0 = Epidata.check(
         Epidata.fluview(self.region, weeks, lag=0, auth=auth))
     r1 = Epidata.check(
         Epidata.fluview(self.region, weeks, lag=1, auth=auth))
     r2 = Epidata.check(
         Epidata.fluview(self.region, weeks, lag=2, auth=auth))
     rx = Epidata.check(Epidata.fluview(self.region, weeks, auth=auth))
     self.data = {}
     self.valid = {}
     self.ew2i, self.i2ew = {}, {}
     for ew in EW.range_epiweeks(weeks['from'], weeks['to'],
                                 inclusive=True):
         if 200916 <= ew <= 201015:
             continue
         i = len(self.ew2i)
         self.ew2i[ew] = i
         self.i2ew[i] = ew
     for row in r0 + r1 + r2 + rx:
         ew, wili, lag = row['epiweek'], row['wili'], row['lag']
         if ew not in self.ew2i:
             continue
         i = self.ew2i[ew]
         if i not in self.data:
             self.data[i] = {}
             self.valid[i] = {0: False, 1: False, 2: False, 'stable': False}
         if not (0 <= lag <= 2):
             lag = 'stable'
         self.data[i][lag] = wili
         self.valid[i][lag] = True
     self.weeks = sorted(list(self.data.keys()))
     for i in self.weeks:
         if 'stable' not in self.data[i]:
             continue
         for lag in range(3):
             if lag not in self.data[i]:
                 self.data[i][lag] = self.data[i]['stable']
Пример #12
0
 def get_training_set(location, epiweek, signal, valid):
   ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
   result = Epidata.paho_dengue(location, weeks0)
   rows = Epidata.check(result)
   stable = extract(rows, 'num_dengue', to_weekly=True)
   data = {}
   for ew in signal.keys():
     if ew == ew3 or ew not in stable:
       continue
     sig = signal[ew]
     num_dengue = stable[ew]
     data[ew] = {'x': sig, 'y': num_dengue}
   return get_training_set_data(data)
Пример #13
0
 def __init__(self, region, target):
   self.region = region
   self.target = target
   weeks = Epidata.range(201401, 202330)
   r0 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=0))
   r1 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=1))
   r2 = Epidata.check(Epidata.paho_dengue(self.region, weeks, lag=2))
   rx = Epidata.check(Epidata.paho_dengue(self.region, weeks))
   self.data = {}
   self.valid = {}
   self.ew2i, self.i2ew = {}, {}
   for ew in EW.range_epiweeks(weeks['from'], weeks['to'], inclusive=True):
     # if 200916 <= ew <= 201015:
     #   continue
     i = len(self.ew2i)
     self.ew2i[ew] = i
     self.i2ew[i] = ew
   for row in r0 + r1 + r2 + rx:
     ew, observation, lag = row['epiweek'], row[self.target], row['lag']
     if ew not in self.ew2i:
       continue
     i = self.ew2i[ew]
     if i not in self.data:
       self.data[i] = {}
       self.valid[i] = {0: False, 1: False, 2: False, 'stable': False}
     if not (0 <= lag <= 2):
       lag = 'stable'
     self.data[i][lag] = observation
     self.valid[i][lag] = True
   self.weeks = sorted(list(self.data.keys()))
   for i in self.weeks:
     if 'stable' not in self.data[i]:
       continue
     for lag in range(3):
       if lag not in self.data[i]:
         self.data[i][lag] = self.data[i]['stable']
Пример #14
0
def get_prediction(location, epiweek, name, fields, fetch, valid):
  if type(fields) == str:
    fields = [fields]
  ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
  rows = Epidata.check(fetch(weeks1))
  signal = extract(rows, fields)
  min_rows = 3 + len(fields)
  if ew3 not in signal:
    raise Exception('%s unavailable on %d' % (name, ew3))
  if len(signal) < min_rows:
    raise Exception('%s available less than %d weeks' % (name, min_rows))
  epiweeks, X, Y = get_training_set(location, epiweek, signal, valid)
  min_rows = min_rows - 1
  if len(Y) < min_rows:
    raise Exception('(w)ILI available less than %d weeks' % (min_rows))
  model = get_model(ew3, epiweeks, X, Y)
  value = apply_model(ew3, model, signal[ew3])
  return value
Пример #15
0
  def fit_loch_ness(location, epiweek, name, fields, fetch, valid):
    
    # Helper functions
    def get_weeks(epiweek):
      ew1 = 200330
      ew2 = epiweek
      ew3 = flu.add_epiweeks(epiweek, 1)
      weeks0 = Epidata.range(ew1, ew2)
      weeks1 = Epidata.range(ew1, ew3)
      return (ew1, ew2, ew3, weeks0, weeks1)
    
    def extract(rows, fields):
      data = {}
      for row in rows:
        data[row['epiweek']] = [float(row[f]) for f in fields]
      return data
    
    def get_training_set_data(data):
      epiweeks = sorted(list(data.keys()))
      X = [data[ew]['x'] for ew in epiweeks]
      Y = [data[ew]['y'] for ew in epiweeks]
      return (epiweeks, X, Y)

    def get_training_set(location, epiweek, signal, valid):
      ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
      auth = secrets.api.fluview
      try:
        result = Epidata.fluview(location, weeks0, issues=ew2, auth=auth)
        rows = Epidata.check(result)
        unstable = extract(rows, ['wili'])
      except Exception:
        unstable = {}
      rows = Epidata.check(Epidata.fluview(location, weeks0, auth=auth))
      stable = extract(rows, ['wili'])
      data = {}
      num_dropped = 0
      for ew in signal.keys():
        if ew == ew3:
          continue
        sig = signal[ew]
        if ew not in unstable:
          if valid and flu.delta_epiweeks(ew, ew3) <= 5:
            raise Exception('unstable wILI is not available on %d' % ew)
          if ew not in stable:
            num_dropped += 1
            continue
          wili = stable[ew]
        else:
          wili = unstable[ew]
        data[ew] = {'x': sig, 'y': wili}
      if num_dropped:
        msg = 'warning: dropped %d/%d signal weeks because (w)ILI was unavailable'
        print(msg % (num_dropped, len(signal)))
      return get_training_set_data(data)
    
    def dot(*Ms):
      """ Simple function to compute the dot product 
      for any number of arguments.
      """
      N = Ms[0]
      for M in Ms[1:]:
        N = np.dot(N, M)
      return N
    
    def get_weight(ew1, ew2):
      """ This function gives the weight between two given
      epiweeks based on a function that:
        - drops sharply over the most recent ~3 weeks
        - falls off exponentially with time
        - puts extra emphasis on the past weeks at the 
          same time of year (seasonality)
        - gives no week a weight of zero
      """
      dw = flu.delta_epiweeks(ew1, ew2)
      yr = 52.2
      hl1, hl2, bw = yr, 1, 4
      a = 0.05
      #b = (np.cos(2 * np.pi * (dw / yr)) + 1) / 2
      b = np.exp(-((min(dw % yr, yr - dw % yr) / bw) ** 2))
      c = 2 ** -(dw / hl1)
      d = 1 - 2 ** -(dw / hl2)
      return (a + (1 - a) * b) * c * d

    def get_periodic_bias(epiweek):
      weeks_per_year = 52.2
      offset = flu.delta_epiweeks(200001, epiweek) % weeks_per_year
      angle = np.pi * 2 * offset / weeks_per_year
      return [np.sin(angle), np.cos(angle)]
    
    def apply_model(epiweek, beta, values):
      bias0 = [1.]
      if beta.shape[0] > len(values) + 1:
        # constant and periodic bias
        bias1 = get_periodic_bias(epiweek)
        obs = np.array([values + bias0 + bias1])
      else:
        # constant bias only
        obs = np.array([values + bias0])
      return float(dot(obs, beta))

    def get_model(ew2, epiweeks, X, Y):
      ne, nx1, nx2, ny = len(epiweeks), len(X), len(X[0]), len(Y)
      if ne != nx1 or nx1 != ny:
        raise Exception('length mismatch e=%d X=%d Y=%d' % (ne, nx1, ny))
      weights = np.diag([get_weight(ew1, ew2) for ew1 in epiweeks])
      X = np.array(X).reshape((nx1, nx2))
      Y = np.array(Y).reshape((ny, 1))
      bias0 = np.ones(Y.shape)
      if ne >= 26 and flu.delta_epiweeks(epiweeks[0], epiweeks[-1]) >= 52:
        # constant and periodic bias
        bias1 = np.array([get_periodic_bias(ew) for ew in epiweeks])
        X = np.hstack((X, bias0, bias1))
      else:
        # constant bias only
        X = np.hstack((X, bias0))
      XtXi = np.linalg.inv(dot(X.T, weights, X))
      XtY = dot(X.T, weights, Y)
      return np.dot(XtXi, XtY)
    
    if type(fields) == str:
      fields = [fields]
    
    ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
    rows = Epidata.check(fetch(weeks1))
    signal = extract(rows, fields)
    min_rows = 3 + len(fields)

    if ew3 not in signal:
      raise Exception('%s unavailable on %d' % (name, ew3))
    if len(signal) < min_rows:
      raise Exception('%s available less than %d weeks' % (name, min_rows))
    
    epiweeks, X, Y = get_training_set(location, epiweek, signal, valid)
    
    min_rows = min_rows - 1
    if len(Y) < min_rows:
      raise Exception('(w)ILI available less than %d weeks' % (min_rows))
    
    model = get_model(ew3, epiweeks, X, Y)
    value = apply_model(ew3, model, signal[ew3])
    return value
Пример #16
0
def get_epic(location, epiweek, valid):
  fc = Epidata.check(Epidata.delphi('ec', epiweek))[0]
  return fc['forecast']['data'][location]['x1']['point']
Пример #17
0
    def fit_loch_ness(location,
                      epiweek,
                      name,
                      fields,
                      fetch,
                      valid,
                      target,
                      signal_to_truth_ew_shift=0):
        # target_type is added for compatibility for other type of targets such as datasetname data

        # Helper functions
        def get_weeks(epiweek):
            ew1 = 199301
            ew2 = epiweek
            ew3 = flu.add_epiweeks(epiweek, 1)
            weeks0 = Epidata.range(ew1, ew2)
            weeks1 = Epidata.range(ew1, ew3)
            return (ew1, ew2, ew3, weeks0, weeks1)

        def extract(rows, fields, signal_to_truth_ew_shift):
            data = {}
            for row in rows:
                data[flu.add_epiweeks(row['epiweek'],
                                      signal_to_truth_ew_shift)] = [
                                          float(row[f]) for f in fields
                                      ]
            return data

        def get_training_set_data(data):
            epiweeks = sorted(list(data.keys()))
            X = [data[ew]['x'] for ew in epiweeks]
            Y = [data[ew]['y'] for ew in epiweeks]
            return (epiweeks, X, Y)

        def get_training_set_datasetname(location, epiweek, signal, target,
                                         signal_to_truth_ew_shift):
            ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
            groundTruth = dict()
            auth = secrets.api.datasetname_targets
            datasetnameData = Epidata.check(
                Epidata.datasetname_targets(auth, target, location, weeks0))
            for row in datasetnameData:
                groundTruth[row['epiweek']] = row['value']
            data = {}
            dropped_weeks = 0
            for signal_week in signal.keys():
                ground_truth_week = flu.add_epiweeks(signal_week,
                                                     signal_to_truth_ew_shift)
                # skip the week we're trying to predict
                if ground_truth_week == ew3:
                    continue
                sig = signal[signal_week]
                if ground_truth_week in groundTruth:
                    label = groundTruth[ground_truth_week]
                else:
                    dropped_weeks += 1
                    continue
                data[ground_truth_week] = {'x': sig, 'y': label}
            if dropped_weeks:
                msg = 'warning: dropped %d/%d signal weeks because ground truth / target was unavailable'
                print(msg % (dropped_weeks, len(signal)))
            epiweeks = sorted(list(data.keys()))
            X = [data[week]['x'] for week in epiweeks]
            Y = [data[week]['y'] for week in epiweeks]
            return (epiweeks, X, Y)

        def dot(*Ms):
            """ Simple function to compute the dot product
      for any number of arguments.
      """
            N = Ms[0]
            for M in Ms[1:]:
                N = np.dot(N, M)
            return N

        def get_weight(ew1, ew2):
            """ This function gives the weight between two given
      epiweeks based on a function that:
        - drops sharply over the most recent ~3 weeks
        - falls off exponentially with time
        - puts extra emphasis on the past weeks at the
          same time of year (seasonality)
        - gives no week a weight of zero
      """
            dw = flu.delta_epiweeks(ew1, ew2)
            yr = 52.2
            hl1, hl2, bw = yr, 1, 4
            a = 0.05
            # b = (np.cos(2 * np.pi * (dw / yr)) + 1) / 2
            b = np.exp(-((min(dw % yr, yr - dw % yr) / bw)**2))
            c = 2**-(dw / hl1)
            d = 1 - 2**-(dw / hl2)
            return (a + (1 - a) * b) * c * d

        def get_periodic_bias(epiweek):
            weeks_per_year = 52.2
            offset = flu.delta_epiweeks(200001, epiweek) % weeks_per_year
            angle = np.pi * 2 * offset / weeks_per_year
            return [np.sin(angle), np.cos(angle)]

        def apply_model(epiweek, beta, values):
            bias0 = [1.]
            if beta.shape[0] > len(values) + 1:
                # constant and periodic bias
                bias1 = get_periodic_bias(epiweek)
                obs = np.array([values + bias0 + bias1])
            else:
                # constant bias only
                obs = np.array([values + bias0])
            return float(dot(obs, beta))

        def get_model(ew2, epiweeks, X, Y):
            ne, nx1, nx2, ny = len(epiweeks), len(X), len(X[0]), len(Y)
            if ne != nx1 or nx1 != ny:
                raise Exception('length mismatch e=%d X=%d Y=%d' %
                                (ne, nx1, ny))
            weights = np.diag([get_weight(ew1, ew2) for ew1 in epiweeks])
            X = np.array(X).reshape((nx1, nx2))
            Y = np.array(Y).reshape((ny, 1))
            bias0 = np.ones(Y.shape)
            if ne >= 26 and flu.delta_epiweeks(epiweeks[0],
                                               epiweeks[-1]) >= 52:
                # constant and periodic bias
                bias1 = np.array([get_periodic_bias(ew) for ew in epiweeks])
                X = np.hstack((X, bias0, bias1))
            else:
                # constant bias only
                X = np.hstack((X, bias0))
            XtXi = np.linalg.inv(dot(X.T, weights, X))
            XtY = dot(X.T, weights, Y)
            return np.dot(XtXi, XtY)

        if type(fields) == str:
            fields = [fields]

        ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
        rows = Epidata.check(fetch(weeks1))
        signal = extract(rows, fields, signal_to_truth_ew_shift)
        # rule of thumb: require num training instances >= 10x num features and >= 52
        min_rows = max(10 * len(fields), 52)

        if ew3 not in signal:
            raise Exception('%s unavailable on %d' % (name, ew3))
        if len(signal) < min_rows:
            raise Exception('%s available less than %d weeks' %
                            (name, min_rows))

        epiweeks, X, Y = get_training_set_datasetname(
            location, epiweek, signal, target, signal_to_truth_ew_shift)

        min_rows = min_rows - 1
        if len(Y) < min_rows:
            raise Exception(
                'datasetname_targets available less than %d weeks' %
                (min_rows))

        model = get_model(ew3, epiweeks, X, Y)
        value = apply_model(ew3, model, signal[ew3])
        return value
Пример #18
0
def get_most_recent_issue():
  # search for FluView issues within the last 10 weeks
  ew2 = EpiDate.today().get_ew()
  ew1 = flu.add_epiweeks(ew2, -9)
  rows = Epidata.check(Epidata.fluview('nat', Epidata.range(ew1, ew2)))
  return max([row['issue'] for row in rows])
Пример #19
0
  def fit_loch_ness(location, epiweek, name, field, fetch, valid, target):
    # target_type is added for compatibility for other type of targets such as norovirus data

    # Helper functions
    def get_weeks(epiweek):
      ew1 = 201401
      ew2 = epiweek
      ew3 = flu.add_epiweeks(epiweek, 1)
      weeks0 = Epidata.range(ew1, ew2)
      weeks1 = Epidata.range(ew1, ew3)
      return (ew1, ew2, ew3, weeks0, weeks1)

    def extract(rows, field, to_weekly=False):
      data = {}
      for row in rows:
        data[row['epiweek']] = float(row[field])
      if not to_weekly:
        return data
      else:
        w_data = cum_to_week(data)
        return w_data

    def get_training_set_data(data):
      epiweeks = sorted(list(data.keys()))
      X = [data[ew]['x'] for ew in epiweeks]
      Y = [data[ew]['y'] for ew in epiweeks]
      return (epiweeks, X, Y)

    def get_training_set(location, epiweek, signal, valid):
      ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
      result = Epidata.paho_dengue(location, weeks0)
      rows = Epidata.check(result)
      stable = extract(rows, 'num_dengue', to_weekly=True)
      data = {}
      for ew in signal.keys():
        if ew == ew3 or ew not in stable:
          continue
        sig = signal[ew]
        num_dengue = stable[ew]
        data[ew] = {'x': sig, 'y': num_dengue}
      return get_training_set_data(data)

    def dot(*Ms):
      """ Simple function to compute the dot product
      for any number of arguments.
      """
      N = Ms[0]
      for M in Ms[1:]:
        N = np.dot(N, M)
      return N

    def get_weight(ew1, ew2):
      """ This function gives the weight between two given
      epiweeks based on a function that:
        - drops sharply over the most recent ~3 weeks
        - falls off exponentially with time
        - puts extra emphasis on the past weeks at the
          same time of year (seasonality)
        - gives no week a weight of zero
      """
      dw = flu.delta_epiweeks(ew1, ew2)
      yr = 52.2
      hl1, hl2, bw = yr, 1, 4
      a = 0.05
      # b = (np.cos(2 * np.pi * (dw / yr)) + 1) / 2
      b = np.exp(-((min(dw % yr, yr - dw % yr) / bw) ** 2))
      c = 2 ** -(dw / hl1)
      d = 1 - 2 ** -(dw / hl2)
      return (a + (1 - a) * b) * c * d

    def get_periodic_bias(epiweek):
      weeks_per_year = 52.2
      offset = flu.delta_epiweeks(201401, epiweek) % weeks_per_year
      angle = np.pi * 2 * offset / weeks_per_year
      return [np.sin(angle), np.cos(angle)]

    def apply_model(epiweek, beta, values):
      bias0 = [1.]
      if beta.shape[0] > len(values) + 1:
        # constant and periodic bias
        bias1 = get_periodic_bias(epiweek)
        obs = np.array([values + bias0 + bias1])
      else:
        # constant bias only
        obs = np.array([values + bias0])
      return float(dot(obs, beta))

    def get_model(ew2, epiweeks, X, Y):
      ne, nx1, ny = len(epiweeks), len(X), len(Y)
      if type(X[0]) == type([]):
        nx2 = len(X[0])
      else:
        nx2 = 1
      if ne != nx1 or nx1 != ny:
        raise Exception('length mismatch e=%d X=%d Y=%d' % (ne, nx1, ny))
      weights = np.diag([get_weight(ew1, ew2) for ew1 in epiweeks])
      X = np.array(X).reshape((nx1, nx2))
      Y = np.array(Y).reshape((ny, 1))
      bias0 = np.ones(Y.shape)
      if ne >= 26 and flu.delta_epiweeks(epiweeks[0], epiweeks[-1]) >= 52:
        # constant and periodic bias
        bias1 = np.array([get_periodic_bias(ew) for ew in epiweeks])
        X = np.hstack((X, bias0, bias1))
      else:
        # constant bias only
        X = np.hstack((X, bias0))
      XtXi = np.linalg.inv(dot(X.T, weights, X))
      XtY = dot(X.T, weights, Y)
      return np.dot(XtXi, XtY)

    ew1, ew2, ew3, weeks0, weeks1 = get_weeks(epiweek)
    rows = Epidata.check(fetch(weeks1))
    signal = extract(rows, field)
    min_rows = 4

    if ew3 not in signal:
      raise Exception('%s unavailable on %d' % (name, ew3))
    if len(signal) < min_rows:
      raise Exception('%s available less than %d weeks' % (name, min_rows))

    epiweeks, X, Y = get_training_set(location, epiweek, signal, target)

    min_rows = min_rows - 1
    if len(Y) < min_rows:
      raise Exception('paho_dengue available less than %d weeks' % (min_rows))

    model = get_model(ew3, epiweeks, X, Y)
    value = apply_model(ew3, model, [signal[ew3]])
    return value
Пример #20
0
def nowcast(epiweek, epidata_cache=None):
    si = StateInfo()
    # all sensors and locations
    all_names, all_loc = get_all_sensors()
    # get sensors available on the target week
    rows = Epidata.check(
        Epidata.sensors(secrets.api.sensors, all_names, all_loc, epiweek))
    present = {}
    for row in rows:
        name, loc, value = row['name'], row['location'], row['value']
        if name not in present:
            present[name] = {}
        if loc not in present[name]:
            present[name][loc] = value
    # get the history of each available sensor (6 sec)
    past = {}
    sensor_locs = set()
    missing = set()
    past_weeks = Epidata.range(FIRST_DATA_EPIWEEK,
                               flu.add_epiweeks(epiweek, -1))
    all_epiweeks = [
        w for w in flu.range_epiweeks(
            past_weeks['from'], past_weeks['to'], inclusive=True)
    ]
    num_obs = len(all_epiweeks)
    for name in present.keys():
        past[name] = {}
        for loc in present[name].keys():
            past[name][loc] = {}
            sensor_locs |= set([loc])
            #print(name, loc)
            try:
                if epidata_cache is not None:
                    rows = epidata_cache.sensors(name, loc, past_weeks)
                else:
                    rows = Epidata.check(
                        Epidata.sensors(secrets.api.sensors, name, loc,
                                        past_weeks))
                if len(rows) < 2:
                    raise Exception()
                for row in rows:
                    past[name][loc][row['epiweek']] = row['value']
            except:
                missing |= set([(name, loc)])
    # remove sensors with zero past data
    for (n, l) in missing:
        del present[n][l]
        if len(present[n]) == 0:
            del present[n]
        del past[n][l]
        if len(past[n]) == 0:
            del past[n]
        #print(n, l, 'is missing')
    # inventory
    all_sensors = []
    for n in all_names:
        for l in si.nat + si.hhs + si.cen + si.sta:
            if n in past and l in past[n]:
                all_sensors.append((n, l))
    #print(all_sensors)
    num_sensors = len(all_sensors)
    # get historical ground truth for each sensor (4 sec)
    truth = {}
    auth = secrets.api.fluview
    for loc in sensor_locs:
        truth[loc] = {}
        if epidata_cache is not None:
            srows = epidata_cache.fluview(loc, past_weeks)
        else:
            srows = Epidata.check(Epidata.fluview(loc, past_weeks, auth=auth))
        sdata = dict([(r['epiweek'], r) for r in srows])
        udata = {}
        try:
            i = past_weeks['to']
            result = Epidata.fluview(loc, past_weeks, issues=i, auth=auth)
            urows = Epidata.check(result)
            udata = dict([(r['epiweek'], r) for r in urows])
        except:
            pass
        rows = []
        for ew in all_epiweeks:
            if ew in udata:
                rows.append(udata[ew])
            else:
                rows.append(sdata[ew])
        for row in rows:
            truth[loc][row['epiweek']] = row['wili']
    # rows are epiweeks, cols are sensors
    X = np.zeros((num_obs, num_sensors)) * np.nan
    for (r, ew) in enumerate(all_epiweeks):
        for (c, (name, loc)) in enumerate(all_sensors):
            if name in past and loc in past[name] and ew in past[name][
                    loc] and loc in truth and ew in truth[loc]:
                X[r, c] = past[name][loc][ew] - truth[loc][ew]
    # sparse precision matrix
    Ri = Fusion.precision(X, mean=np.zeros((1, num_sensors)), b=0.25)
    # prepare for sensor fusion
    inputs = all_sensors
    state = si.sta
    outputs = si.nat + si.hhs + si.cen + si.sta
    num_i, num_s, num_o = len(inputs), len(state), len(outputs)
    # input  (z): [ num_i  x    1   ]
    # state  (x): [ num_s  x    1   ]
    # output (y): [ num_o  x    1   ]
    # S->I   (H): [ num_i  x  num_s ]
    # S->O   (W): [ num_o  x  num_s ]
    z = np.array([present[n][l] for (n, l) in inputs]).reshape((num_i, 1))
    H = np.zeros((num_i, num_s))
    W = np.zeros((num_o, num_s))
    # populate H, given input signals
    for (row, (name, location)) in enumerate(inputs):
        for (col, loc) in enumerate(state):
            if loc in si.within[location]:
                H[row, col] = si.weight[location][loc]
    if np.linalg.matrix_rank(np.dot(H.T, H)) != num_s:
        raise Exception('H is singluar')
    if not np.allclose(np.sum(H, axis=1), 1):
        raise Exception('H rows do not sum to 1')
    # populate W, given output locations
    for (row, location) in enumerate(outputs):
        for (col, loc) in enumerate(state):
            if loc in si.within[location]:
                W[row, col] = si.weight[location][loc]
    if not np.allclose(np.sum(W, axis=1), 1):
        raise Exception('W rows do not sum to 1')
    # sensor fusion
    x, P = Fusion.fuse(z, Ri, H)
    y, S = Fusion.extract(x, P, W)
    print(num_obs, num_i, num_s, num_o)
    pt = [float(v) for v in y.flatten()]
    std = [float(v) for v in np.sqrt(S).flatten()]
    return (outputs, pt, std)