# ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' GREEN = .0001 #deaths / pop ORANGE = .0005 #deaths / pop RED = 1 #deaths / pop # ------------------------------------------ pop_data = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_US.csv') test_data = os.path.join(BASE_PATH, 'csse_covid_19_daily_reports_us', '05-19-2020.csv') area_data = os.path.join(BASE_PATH, 'areas.csv') pop_data = data.load_csv_data(pop_data) test_data = data.load_csv_data(test_data) area_data = data.load_csv_data(area_data) population = [] tests = [] death_proportion = [] deaths_labels = [] areas = [] #square miles densities = [] #store population density here for each state for state in np.unique(pop_data['Province_State']): if state == 'Grand Princess' or state == 'Diamond Princess': continue sum_pop = 0 for id_curr_state in range(len(pop_data['Province_State'])): if pop_data['Province_State'][id_curr_state] == state:
import numpy as np from sklearn.neighbors import (KNeighborsClassifier, DistanceMetric) import json import scipy import matplotlib.pyplot as plt # ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' N_NEIGHBORS = 5 MIN_CASES = 1000 NORMALIZE = True # ------------------------------------------ confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_global.csv') confirmed = data.load_csv_data(confirmed) features = [] targets = [] ''' for val in np.unique(confirmed["Country/Region"]): df = data.filter_by_attribute( confirmed, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) features.append(cases) targets.append(labels) features = np.concatenate(features, axis=0) targets = np.concatenate(targets, axis=0) predictions = {} for _dist in ['minkowski', 'manhattan']:
import numpy as np from sklearn.neighbors import (KNeighborsClassifier, DistanceMetric) import json import matplotlib.pyplot as plt import seaborn as sns # ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' N_NEIGHBORS = 5 MIN_CASES = 1000 NORMALIZE = True # ------------------------------------------ confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_global.csv') confirmed = data.load_csv_data(confirmed) deaths = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_global.csv') deaths = data.load_csv_data(deaths) recoveries = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_recovered_global.csv') recoveries = data.load_csv_data(recoveries) features = [] targets = [] #print(len(confirmed["Country/Region"])) #print(len(deaths["Country/Region"])) #print(len(recoveries["Country/Region"])) N = len(confirmed["Country/Region"])
############################################################################################################## # ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' N_NEIGHBORS = 5 MIN_CASES = 1000 NORMALIZE = True # ------------------------------------------ confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_US.csv') deaths = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_US.csv') confirmed = data.load_csv_data(confirmed) deaths = data.load_csv_data(deaths) state_dict = {} features_deaths = [] targets_deaths = [] i = 0 for val in np.unique(deaths["Province_State"]): state_dict.update({i: val}) df = data.filter_by_attribute(deaths, "Province_State", val) cases, labels = data.get_cases_chronologically(df) label = i new_labels = np.ones(labels.shape[0]) * i
# ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' # ------------------------------------------ """ I am interested in looking at how the rate of cases affects the death rate (deaths/cases) I am expecting that higher jumps in the number of cases increases the death rate I will use linear regression then a neural network to see if there is a relationship, and if so what it is. """ """ Loading data SECTION 1 BELOW """ confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_global.csv') confirmed = data.load_csv_data(confirmed) dead = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_global.csv') dead = data.load_csv_data(dead) confirmed_features = [] confirmed_targets = [] dead_features = [] dead_targets = [] for val in np.unique(confirmed["Country/Region"]): df = data.filter_by_attribute(confirmed, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) confirmed_features.append(cases) confirmed_targets.append(labels)
import os import sklearn import numpy as np import json import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') # ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' MIN_CASES = 1000 # ------------------------------------------ death = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_deaths_global.csv') death = data.load_csv_data(death) features = [] targets = [] fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cm = plt.get_cmap('jet') NUM_COLORS = 0 LINE_STYLES = ['solid', 'dashed', 'dotted'] NUM_STYLES = len(LINE_STYLES) for val in np.unique(death["Country/Region"]): df = data.filter_by_attribute(death, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0)
import sklearn import numpy as np import json import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') # ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' MIN_CASES = 1000 # ------------------------------------------ confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19-covid-Confirmed.csv') confirmed = data.load_csv_data( "COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv" ) features = [] targets = [] fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cm = plt.get_cmap('jet') NUM_COLORS = 0 LINE_STYLES = ['solid', 'dashed', 'dotted'] NUM_STYLES = len(LINE_STYLES) for val in np.unique(confirmed["Country/Region"]): df = data.filter_by_attribute(confirmed, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0)
import os import sklearn import numpy as np import json import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') # ------------ HYPERPARAMETERS ------------- BASE_PATH = '../COVID-19/csse_covid_19_data/' MIN_CASES = 1000 # ------------------------------------------ recovered = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_recovered_global.csv') recovered = data.load_csv_data(recovered) features = [] targets = [] fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cm = plt.get_cmap('jet') NUM_COLORS = 0 LINE_STYLES = ['solid', 'dashed', 'dotted'] NUM_STYLES = len(LINE_STYLES) for val in np.unique(recovered["Country/Region"]): df = data.filter_by_attribute(recovered, "Country/Region", val) cases, labels = data.get_cases_chronologically(df) cases = cases.sum(axis=0)
def load_global(): BASE_PATH = './COVID-19/csse_covid_19_data/' confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_global.csv') return data.load_csv_data(confirmed)