def grabDataFromEpicast(self): if self.region=='': self.fludata = Epidata.fluview(self.state, [Epidata.range(201040,self.todaysEW)]) elif self.state=='': self.fludata = Epidata.fluview(self.region, [Epidata.range(201040,self.todaysEW)]) else: self.fludata = Epidata.fluview(self.region+self.state, [Epidata.range(201040,self.todaysEW)]) self.fludata_message = self.fludata['message'] self.fludata_data = self.fludata['epidata']
def EpiCallForData(year, week, region, lag): if region == 'nat': fluData = Epidata.fluview(regions=['nat'], epiweeks=['{:04d}{:02d}'.format(year, week)], lag=lag) else: region = int(region) fluData = Epidata.fluview(regions=['HHS{:d}'.format(region)], epiweeks=['{:04d}{:02d}'.format(year, week)], lag=lag) return fluData
def get_unstable_wILI(region, ew1, ew2): weeks = Epidata.range(ew1, ew2) epidata = AF_Utils._get(Epidata.fluview(region, weeks, issues=ew2)) data = [row['wili'] for row in epidata] if len(data) != flu.delta_epiweeks(ew1, ew2) + 1: raise Exception('missing data') return data
def get_ili(location, issue, ew1, ew2): result = {} epiweeks = Epidata.range(ew1, ew2) num_weeks = flu.delta_epiweeks(ew1, ew2) + 1 # try to get unstable, but gracefully fall back to stable if issue is not None: res = Epidata.fluview(location, epiweeks, issues=issue) if res['result'] == 1: for row in res['epidata']: result[row['epiweek']] = row['wili'] # check to see if another API call is needed if issue is None or res['result'] != 1 or len(res['epidata']) < num_weeks: # get stable data data = api_fetch(Epidata.fluview(location, epiweeks)) for row in data: epiweek = row['epiweek'] if epiweek not in result: result[epiweek] = row['wili'] # return a list of weekly data return [[result[ew]] for ew in sorted(list(result.keys()))]
def get_fluview_data(states, start, end): """ return a dictionary of dataframe with the different epiweeks """ ilinet_raw = {} for state in states: print("State {}".format(state)) res = Epidata.fluview( regions=state, #source epiweeks=[Epidata.range(start, end)]) #range 2009 to 2016 if res['result'] == 1: print(res['result'], res['message'], len(res['epidata'])) data = pd.DataFrame(res['epidata']) ilinet_raw[state] = data else: print("(-2, u'no success')") return ilinet_raw
def get_influenza_counts_df(): """Load influenza counts from the CMU Delphi API, return a pandas dataframe""" # Retrieves current date, formats it "YYYY-mm-dd", and converts it to epiweek today_obj = datetime.today() today_str = today_obj.strftime("%Y-%m-%d") epiweek = DataLoader.get_approx_epiweek_from_date(today_str) # Retrieves national fluview data for each "epiweek" from 2020: results = Epidata.fluview(["nat"], [Epidata.range(202001, epiweek)]) results_df = pd.DataFrame.from_records( results["epidata"]).sort_values(by=["epiweek"]) results_df = results_df[[ "epiweek", "lag", "num_ili", "num_patients", "num_providers", "wili", "ili" ]] # Convert epiweeks to approximate real date for graphing results_df["date"] = results_df["epiweek"].apply( DataLoader.get_approx_date_from_epiweek) return results_df
return obs unique_EWLagPairs= unique_EWLagPairs.apply(addLag,1) return d.merge( unique_EWLagPairs, on = ['EW','lag']) def timeStamp(): return datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S") if __name__ == "__main__": firstWeekOfSeason = datetime.datetime.strptime('2019-10-01',"%Y-%m-%d") epiWeeks = computeEpiWeeksWithData(firstWeekOfSeason) regions = createAllRegions() mostRecentEpiData = {'EW':[],'region':[],'wili':[],'lag':[],'releaseDate':[],'releaseEW':[]} for lag in np.arange(40,-1,-1): fluData = Epidata.fluview(regions = regions ,epiweeks = epiWeeks,lag=lag) if fluData['message'] != 'success': print('could not download data-lag={:d}'.format(lag)) continue print('Downloading data-lag={:d}'.format(lag)) for data in fluData['epidata']: mostRecentEpiData['EW'].append(data['epiweek']) mostRecentEpiData['region'].append(data['region']) mostRecentEpiData['wili'].append(data['wili']) mostRecentEpiData['lag'].append(lag) mostRecentEpiData['releaseDate'].append(data['release_date']) releasedateDT = datetime.datetime.strptime(data['release_date'],"%Y-%m-%d") mostRecentEpiData['releaseEW'].append( fromDateTime2EW(releasedateDT )) mostRecentEpiData = pd.DataFrame(mostRecentEpiData)
cur = cnx.cursor(buffered=True) # Get ground truth history = {} regions = [ "nat", "hhs1", "hhs2", "hhs3", "hhs4", "hhs5", "hhs6", "hhs7", "hhs8", "hhs9", "hhs10", "ga", "pa", "dc", "tx", "or" ] # for 2017-18 season, 201744 is the first ground truth data we get after the competition starts (i.e., users forecasted for it in 201743) ############################################################# season_start, season_end = 201744, 201820 for r in range(1, len(regions) + 1): history[r] = {} rows = Epidata.check( Epidata.fluview(regions[r - 1], Epidata.range(season_start, season_end))) truth = [(row['epiweek'], row['wili']) for row in rows] availableWeeks = [row[0] for row in truth] for row in truth: (epiweek, wili) = row history[r][epiweek] = wili print(regions[r - 1], epiweek, wili) epiweek = availableWeeks[-1] print("epiweek", epiweek) if (epiweek == 201801): forecast_made = 201752 else: forecast_made = epiweek - 1 # debug print print("availableWeeks", availableWeeks) expected_weeks = epi_utils.delta_epiweeks(season_start, epiweek) + 1
def EpiCallForLag(year, week, region): if region == 'nat': return Epidata.fluview(['nat'], ['{:04d}{:02d}'.format(year, week)]) region = int(region) return Epidata.fluview(['HHS{:d}'.format(region)], ['{:04d}{:02d}'.format(year, week)])
regions = ["nat", *["hhs" + str(i) for i in range(1, 11)]] # NOTE Lag value # A lag of 0 means that the data for each week collected will be # as observed at that point in time. # Pass None as lag will let us collect the most recent data # available df = { "epiweek": [], "region": [], "wili": [] } for region in regions: res = Epidata.fluview(region, epiweek_range, lag=None) for data in res["epidata"]: df["epiweek"].append(data["epiweek"]) df["region"].append(data["region"]) df["wili"].append(data["wili"]) # Write to file pd.DataFrame(df).to_csv(snakemake.output.actual_csv, index=False) # Save baseline information bdf = pd.read_csv(BASELINE_URL, index_col=0) def rename_region(region: str) -> str: if region == "National": return "nat" else: