示例#1
0
def mmwrid_to_epiweek(mmwrid):
  """Convert a CDC week index into an epiweek."""

  # Add the difference in IDs, which are sequential, to a reference epiweek,
  # which is 2003w40 in this case.
  epiweek_200340 = EpiDate(2003, 9, 28)
  mmwrid_200340 = 2179
  return epiweek_200340.add_weeks(mmwrid - mmwrid_200340).get_ew()
示例#2
0
 def get_most_recent_issue(self):
   """Return the most recent epiweek for which FluView data is available."""
   ew2 = EpiDate.today().get_ew()
   ew1 = add_epiweeks(ew2, -9)
   response = self.epidata.fluview('nat', self.epidata.range(ew1, ew2))
   issues = [row['issue'] for row in self.epidata.check(response)]
   return max(issues)
示例#3
0
def get_kcdc_data():
    issue = EpiDate.today().get_ew()
    last_season = issue // 100 + (1 if issue % 100 > 35 else 0)
    url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do'
    params = {
        'icdNm': 'influenza',
        'startYear': '2004',  # Started in 2004
        'endYear': str(last_season)
    }
    response = requests.post(url, params)
    datas = response.json()
    data = datas['data']
    ews = []
    ilis = []
    ew1 = 200436
    for year in range(2004, last_season):
        year_data = data[year - 2004]
        if year > 2004:
            ew1 = ews[-1] + 1
        ili_yr = year_data["VALUE"].split('`')
        ili_yr = [float(f) for f in ili_yr if f != '']
        ew2 = add_epiweeks(ew1, len(ili_yr))
        new_ews = list(range_epiweeks(ew1, ew2))
        for i in range(len(new_ews)):
            j = float(ili_yr[i])
            ilis.append(j)
            ews.append(new_ews[i])
    return ews, ilis
示例#4
0
def main():
    # args and usage
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--test',
        action='store_true',
        help='do dry run only, do not update the database'
    )
    parser.add_argument(
        '--file',
        type=str,
        help='load an existing zip file (otherwise fetch current data)'
    )
    parser.add_argument(
        '--issue',
        type=int,
        help='issue of the file (e.g. 201740); used iff --file is given'
    )
    args = parser.parse_args()

    if (args.file is None) != (args.issue is None):
        raise Exception('--file and --issue must both be present or absent')

    date = datetime.datetime.now().strftime('%Y-%m-%d')
    print('assuming release date is today, %s' % date)

    ensure_tables_exist()
    if args.file:
        update_from_file(args.issue, date, args.file, test_mode=args.test)
    else:
        # Code doesn't always download all files, unreproducible errors
        # Try a few times and hopefully one will work
        flag = 0
        max_tries = 5
        while flag < max_tries:
            flag = flag + 1
            tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8))
            tmp_dir = 'downloads_' + tmp_dir
            subprocess.call(["mkdir",tmp_dir])
            # Use temporary directory to avoid data from different time
            #   downloaded to same folder
            download_ecdc_data(download_dir=tmp_dir)
            issue = EpiDate.today().get_ew()
            files = glob.glob('%s/*.csv' % tmp_dir)
            for filename in files:
                with open(filename,'r') as f:
                    _ = f.readline()
            db_error = False
            for filename in files:
                try:
                    update_from_file(issue, date, filename, test_mode=args.test)
                    subprocess.call(["rm",filename])
                except Exception:
                    db_error = True
            subprocess.call(["rm","-r",tmp_dir])
            if not db_error:
                break # Exit loop with success
        if flag >= max_tries:
            print('WARNING: Database `ecdc_ili` did not update successfully')
 def get_weeks(self):
     """Return a list of weeks on which truth and sensors are both available."""
     latest_week = EpiDate.today().get_ew()
     latest_week = add_epiweeks(latest_week, -1)
     week_range = range_epiweeks(self.FIRST_DATA_EPIWEEK,
                                 latest_week,
                                 inclusive=True)
     return list(week_range)
 def get_most_recent_issue(self, location):
     """Return the most recent epiweek for which paho_dengue data is available in given location."""
     ew2 = EpiDate.today().get_ew()
     ew1 = add_epiweeks(ew2, -52)
     response = self.epidata.paho_dengue(location,
                                         self.epidata.range(ew1, ew2))
     ews = [row['epiweek'] for row in self.epidata.check(response)]
     return max(ews)
 def _ew2date(ew):
     # parse the epiweek
     year, week = flu.split_epiweek(ew)
     # get the date object (middle of the week; Wednesday)
     date = EpiDate.from_epiweek(year, week)
     # go to the first day of the week (Sunday)
     date = date.add_days(-3)
     # date as string
     return str(date)
示例#8
0
def get_current_issue():
  """Scrape the current issue from the FluSurv main page."""

  # fetch
  data = fetch_json('GetPhase03InitApp?appVersion=Public', None)

  # extract
  date = datetime.strptime(data['loaddatetime'], '%b %d, %Y')

  # convert and return
  return EpiDate(date.year, date.month, date.day).get_ew()
示例#9
0
def season_db_to_epiweek(season_str,
                         db_date_str,
                         first_db_date_of_season_str="1-Aug"):
    year_strs = season_str.split("-")
    first_year = int(year_strs[0])
    second_year = first_year + 1
    # FIXME check/enforce locale
    first_date_of_season = datetime.datetime.strptime(
        first_db_date_of_season_str + "-" + str(first_year),
        "%d-%b-%Y").date()
    date_using_first_year = datetime.datetime.strptime(
        db_date_str + "-" + str(first_year), "%d-%b-%Y").date()
    date_using_second_year = datetime.datetime.strptime(
        db_date_str + "-" + str(second_year), "%d-%b-%Y").date()
    date = date_using_first_year if date_using_first_year >= first_date_of_season else date_using_second_year
    epiweek = EpiDate(date.year, date.month, date.day).get_ew()
    return epiweek
示例#10
0
def main():
    # args and usage
    parser = argparse.ArgumentParser()
    parser.add_argument('--test',
                        action='store_true',
                        help='do dry run only, do not update the database')
    args = parser.parse_args()

    date = datetime.datetime.now().strftime('%Y-%m-%d')
    print('assuming release date is today, %s' % date)
    issue = EpiDate.today().get_ew()

    ensure_tables_exist()

    ews, ilis = get_kcdc_data()

    update_from_data(ews, ilis, date, issue, test_mode=args.test)
示例#11
0
  def extract_epiweek_and_team(filename):
    """
    Extract the submission epiweek (epiweek of most recently published report)
    and the team name from the file name of a flu contest submission.

    The return value is a tuple of:
      1. the submission epiweek (e.g. 201751)
      2. the team name (e.g. "delphi-epicast")
    """

    # this is the naming convention for 2017 flu contest submissions
    pattern = re.compile('^EW(\\d{2})-(.*)-(\\d{4})-(\\d{2})-(\\d{2}).csv$')
    match = pattern.match(os.path.basename(filename))
    if match is None:
      # only able to parse this specific naming convention
      raise Exception()

    week = int(match.group(1))
    team = match.group(2)
    year = int(match.group(3))
    month = int(match.group(4))
    day = int(match.group(5))
    epiweek = EpiDate(year, month, day).get_ew()

    # We know the week number, but the year has to be inferred from the
    # submission date. Since the week of submission is never less than the week
    # of the most recent report, we can step backwards from the week of
    # submission until we find the expected week number. Ordinarily, this will
    # take exactly two steps. For example, data collected on 2017w51 is
    # reported on 2017w52, and our forecast is submitted on 2018w01; so we
    # start with 2018w01 and step backwards until find the first week 51, which
    # is 2017w51.
    if not 1 <= week <= 53:
      # prevent an infinite loop
      raise Exception('invalid week number: %d' % week)
    while Epiweek.split_epiweek(epiweek)[1] != week:
      epiweek = Epiweek.add_epiweeks(epiweek, -1)

    return epiweek, team
示例#12
0
            #print('Updated Epicast df for %d users.' % future._num_users)

        forecaster._callback = update_epicast_df

        print('Generating epicast for', epiweek)
        forecaster.open()
        forecast = forecaster.forecast(
            epiweek)  # is this the forecast function in fc_abstract.py?
        filename = ForecastIO.save_csv(forecast)
        forecaster.close()
        print(filename)
        return filename


if __name__ == '__main__':
    epiweek = EpiDate.today().add_weeks(-1).get_ew()
    print("epiweek: ", epiweek)
    print('WARNING: For testing only!')
    print(' - Using very small number of samples')
    print(' - Not uploading submissions to database')
    print(' - Not emailing submissions to CDC')
    print(' - Assuming last published wILI on %d' % epiweek)
    print(' - Limited locations')

    ec_age_groups = [
        'rate_overall', 'rate_age_0', 'rate_age_1', 'rate_age_2', 'rate_age_3',
        'rate_age_4'
    ]
    sub = Submissions_Hosp(ec_age_groups, 1000)
    ec = None
    ec = sub.run_epicast(epiweek, 0.001, 0.001)
示例#13
0
def update(issue, location_name, test_mode=False):
  """Fetch and store the currently avialble weekly FluSurv dataset."""

  # fetch data
  location_code = flusurv.location_codes[location_name]
  print('fetching data for', location_name, location_code)
  data = flusurv.get_data(location_code)

  # metadata
  epiweeks = sorted(data.keys())
  location = location_name
  release_date = str(EpiDate.today())

  # connect to the database
  u, p = secrets.db.epi
  cnx = mysql.connector.connect(user=u, password=p, database='epidata')
  cur = cnx.cursor()
  rows1 = get_rows(cur)
  print('rows before: %d' % rows1)

  # SQL for insert/update
  sql = '''
  INSERT INTO `flusurv` (
    `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`,
    `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`,
    `rate_age_5`, `rate_age_6`, `rate_age_7`
  )
  VALUES (
    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
  )
  ON DUPLICATE KEY UPDATE
    `release_date` = least(`release_date`, %s),
    `rate_age_0` = coalesce(%s, `rate_age_0`),
    `rate_age_1` = coalesce(%s, `rate_age_1`),
    `rate_age_2` = coalesce(%s, `rate_age_2`),
    `rate_age_3` = coalesce(%s, `rate_age_3`),
    `rate_age_4` = coalesce(%s, `rate_age_4`),
    `rate_overall` = coalesce(%s, `rate_overall`),
    `rate_age_5` = coalesce(%s, `rate_age_5`),
    `rate_age_6` = coalesce(%s, `rate_age_6`),
    `rate_age_7` = coalesce(%s, `rate_age_7`)
  '''

  # insert/update each row of data (one per epiweek)
  for epiweek in epiweeks:
    lag = delta_epiweeks(epiweek, issue)
    if lag > 52:
      # Ignore values older than one year, as (1) they are assumed not to
      # change, and (2) it would adversely affect database performance if all
      # values (including duplicates) were stored on each run.
      continue
    args_meta = [release_date, issue, epiweek, location, lag]
    args_insert = data[epiweek]
    args_update = [release_date] + data[epiweek]
    cur.execute(sql, tuple(args_meta + args_insert + args_update))

  # commit and disconnect
  rows2 = get_rows(cur)
  print('rows after: %d (+%d)' % (rows2, rows2 - rows1))
  cur.close()
  if test_mode:
    print('test mode: not committing database changes')
  else:
    cnx.commit()
  cnx.close()
示例#14
0
def get_most_recent_issue():
  # search for FluView issues within the last 10 weeks
  ew2 = EpiDate.today().get_ew()
  ew1 = flu.add_epiweeks(ew2, -9)
  rows = Epidata.check(Epidata.fluview('nat', Epidata.range(ew1, ew2)))
  return max([row['issue'] for row in rows])