def get_most_recent_issue(self): """Return the most recent epiweek for which FluView data is available.""" ew2 = EpiDate.today().get_ew() ew1 = add_epiweeks(ew2, -9) response = self.epidata.fluview('nat', self.epidata.range(ew1, ew2)) issues = [row['issue'] for row in self.epidata.check(response)] return max(issues)
def get_kcdc_data(): issue = EpiDate.today().get_ew() last_season = issue // 100 + (1 if issue % 100 > 35 else 0) url = 'http://www.cdc.go.kr/npt/biz/npp/iss/influenzaListAjax.do' params = { 'icdNm': 'influenza', 'startYear': '2004', # Started in 2004 'endYear': str(last_season) } response = requests.post(url, params) datas = response.json() data = datas['data'] ews = [] ilis = [] ew1 = 200436 for year in range(2004, last_season): year_data = data[year - 2004] if year > 2004: ew1 = ews[-1] + 1 ili_yr = year_data["VALUE"].split('`') ili_yr = [float(f) for f in ili_yr if f != ''] ew2 = add_epiweeks(ew1, len(ili_yr)) new_ews = list(range_epiweeks(ew1, ew2)) for i in range(len(new_ews)): j = float(ili_yr[i]) ilis.append(j) ews.append(new_ews[i]) return ews, ilis
def main(): # args and usage parser = argparse.ArgumentParser() parser.add_argument( '--test', action='store_true', help='do dry run only, do not update the database' ) parser.add_argument( '--file', type=str, help='load an existing zip file (otherwise fetch current data)' ) parser.add_argument( '--issue', type=int, help='issue of the file (e.g. 201740); used iff --file is given' ) args = parser.parse_args() if (args.file is None) != (args.issue is None): raise Exception('--file and --issue must both be present or absent') date = datetime.datetime.now().strftime('%Y-%m-%d') print('assuming release date is today, %s' % date) ensure_tables_exist() if args.file: update_from_file(args.issue, date, args.file, test_mode=args.test) else: # Code doesn't always download all files, unreproducible errors # Try a few times and hopefully one will work flag = 0 max_tries = 5 while flag < max_tries: flag = flag + 1 tmp_dir = ''.join(random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for i in range(8)) tmp_dir = 'downloads_' + tmp_dir subprocess.call(["mkdir",tmp_dir]) # Use temporary directory to avoid data from different time # downloaded to same folder download_ecdc_data(download_dir=tmp_dir) issue = EpiDate.today().get_ew() files = glob.glob('%s/*.csv' % tmp_dir) for filename in files: with open(filename,'r') as f: _ = f.readline() db_error = False for filename in files: try: update_from_file(issue, date, filename, test_mode=args.test) subprocess.call(["rm",filename]) except Exception: db_error = True subprocess.call(["rm","-r",tmp_dir]) if not db_error: break # Exit loop with success if flag >= max_tries: print('WARNING: Database `ecdc_ili` did not update successfully')
def get_weeks(self): """Return a list of weeks on which truth and sensors are both available.""" latest_week = EpiDate.today().get_ew() latest_week = add_epiweeks(latest_week, -1) week_range = range_epiweeks(self.FIRST_DATA_EPIWEEK, latest_week, inclusive=True) return list(week_range)
def get_most_recent_issue(self, location): """Return the most recent epiweek for which paho_dengue data is available in given location.""" ew2 = EpiDate.today().get_ew() ew1 = add_epiweeks(ew2, -52) response = self.epidata.paho_dengue(location, self.epidata.range(ew1, ew2)) ews = [row['epiweek'] for row in self.epidata.check(response)] return max(ews)
def main(): # args and usage parser = argparse.ArgumentParser() parser.add_argument('--test', action='store_true', help='do dry run only, do not update the database') args = parser.parse_args() date = datetime.datetime.now().strftime('%Y-%m-%d') print('assuming release date is today, %s' % date) issue = EpiDate.today().get_ew() ensure_tables_exist() ews, ilis = get_kcdc_data() update_from_data(ews, ilis, date, issue, test_mode=args.test)
#print('Updated Epicast df for %d users.' % future._num_users) forecaster._callback = update_epicast_df print('Generating epicast for', epiweek) forecaster.open() forecast = forecaster.forecast( epiweek) # is this the forecast function in fc_abstract.py? filename = ForecastIO.save_csv(forecast) forecaster.close() print(filename) return filename if __name__ == '__main__': epiweek = EpiDate.today().add_weeks(-1).get_ew() print("epiweek: ", epiweek) print('WARNING: For testing only!') print(' - Using very small number of samples') print(' - Not uploading submissions to database') print(' - Not emailing submissions to CDC') print(' - Assuming last published wILI on %d' % epiweek) print(' - Limited locations') ec_age_groups = [ 'rate_overall', 'rate_age_0', 'rate_age_1', 'rate_age_2', 'rate_age_3', 'rate_age_4' ] sub = Submissions_Hosp(ec_age_groups, 1000) ec = None ec = sub.run_epicast(epiweek, 0.001, 0.001)
def update(issue, location_name, test_mode=False): """Fetch and store the currently avialble weekly FluSurv dataset.""" # fetch data location_code = flusurv.location_codes[location_name] print('fetching data for', location_name, location_code) data = flusurv.get_data(location_code) # metadata epiweeks = sorted(data.keys()) location = location_name release_date = str(EpiDate.today()) # connect to the database u, p = secrets.db.epi cnx = mysql.connector.connect(user=u, password=p, database='epidata') cur = cnx.cursor() rows1 = get_rows(cur) print('rows before: %d' % rows1) # SQL for insert/update sql = ''' INSERT INTO `flusurv` ( `release_date`, `issue`, `epiweek`, `location`, `lag`, `rate_age_0`, `rate_age_1`, `rate_age_2`, `rate_age_3`, `rate_age_4`, `rate_overall`, `rate_age_5`, `rate_age_6`, `rate_age_7` ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ) ON DUPLICATE KEY UPDATE `release_date` = least(`release_date`, %s), `rate_age_0` = coalesce(%s, `rate_age_0`), `rate_age_1` = coalesce(%s, `rate_age_1`), `rate_age_2` = coalesce(%s, `rate_age_2`), `rate_age_3` = coalesce(%s, `rate_age_3`), `rate_age_4` = coalesce(%s, `rate_age_4`), `rate_overall` = coalesce(%s, `rate_overall`), `rate_age_5` = coalesce(%s, `rate_age_5`), `rate_age_6` = coalesce(%s, `rate_age_6`), `rate_age_7` = coalesce(%s, `rate_age_7`) ''' # insert/update each row of data (one per epiweek) for epiweek in epiweeks: lag = delta_epiweeks(epiweek, issue) if lag > 52: # Ignore values older than one year, as (1) they are assumed not to # change, and (2) it would adversely affect database performance if all # values (including duplicates) were stored on each run. continue args_meta = [release_date, issue, epiweek, location, lag] args_insert = data[epiweek] args_update = [release_date] + data[epiweek] cur.execute(sql, tuple(args_meta + args_insert + args_update)) # commit and disconnect rows2 = get_rows(cur) print('rows after: %d (+%d)' % (rows2, rows2 - rows1)) cur.close() if test_mode: print('test mode: not committing database changes') else: cnx.commit() cnx.close()
def get_most_recent_issue(): # search for FluView issues within the last 10 weeks ew2 = EpiDate.today().get_ew() ew1 = flu.add_epiweeks(ew2, -9) rows = Epidata.check(Epidata.fluview('nat', Epidata.range(ew1, ew2))) return max([row['issue'] for row in rows])