if val not in predictions: predictions[val] = {} predictions[val][_dist] = label.tolist() with open('results/knn_diff.json', 'w') as f: json.dump(predictions, f, indent=4) ''' # ---- Find its 30 geographic neighbors ---- # ---- Then perform knn and predict ---- for val in np.unique(confirmed["Country/Region"]): df = data.filter_by_attribute(confirmed, "Country/Region", val) if df.shape[0] >= 2: whole_data = df['5/19/20'].idxmax(axis=0) df = df.loc[[whole_data]] cases, labels = data.get_cases_chronologically(df) features.append(cases) targets.append(labels) features = np.concatenate(features, axis=0) targets = np.concatenate(targets, axis=0) predictions = {} targets_geo_location = targets[:, 2:] mykd = scipy.spatial.KDTree(targets_geo_location) for _dist in ['minkowski', 'manhattan']: for val in np.unique(confirmed["Country/Region"]): # test data df = data.filter_by_attribute(confirmed, "Country/Region", val) cases, labels = data.get_cases_chronologically(df)
from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split import json # ------------ HYPERPARAMETERS ------------- BASE_PATH = 'COVID-19/csse_covid_19_data/' # ------------------------------------------ confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series', 'time_series_covid19_confirmed_US.csv') confirmed = data.load_csv_data(confirmed) tmpFeatures = [] for val in np.unique(confirmed["Country_Region"]): df = data.filter_by_attribute(confirmed, "Country_Region", val) cases, _ = data.get_cases_chronologically(df) tmpFeatures.append(cases) tmpFeatures = np.concatenate(tmpFeatures, axis=0) features = np.sum(tmpFeatures, axis=0) newCases = np.zeros(features.shape, dtype=np.int32) i = len(features) - 1 while i > 0: newCases[i] = features[i] - features[i - 1] i -= 1 newCases[0] = features[0] dates = np.arange(len(newCases))