예제 #1
0
        if val not in predictions:
            predictions[val] = {}
        predictions[val][_dist] = label.tolist()

with open('results/knn_diff.json', 'w') as f:
    json.dump(predictions, f, indent=4)
'''

# ---- Find its 30 geographic neighbors ----
# ---- Then perform knn and predict ----
for val in np.unique(confirmed["Country/Region"]):
    df = data.filter_by_attribute(confirmed, "Country/Region", val)
    if df.shape[0] >= 2:
        whole_data = df['5/19/20'].idxmax(axis=0)
        df = df.loc[[whole_data]]
    cases, labels = data.get_cases_chronologically(df)
    features.append(cases)
    targets.append(labels)

features = np.concatenate(features, axis=0)
targets = np.concatenate(targets, axis=0)
predictions = {}

targets_geo_location = targets[:, 2:]
mykd = scipy.spatial.KDTree(targets_geo_location)

for _dist in ['minkowski', 'manhattan']:
    for val in np.unique(confirmed["Country/Region"]):
        # test data
        df = data.filter_by_attribute(confirmed, "Country/Region", val)
        cases, labels = data.get_cases_chronologically(df)
예제 #2
0
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import json

# ------------ HYPERPARAMETERS -------------
BASE_PATH = 'COVID-19/csse_covid_19_data/'
# ------------------------------------------

confirmed = os.path.join(BASE_PATH, 'csse_covid_19_time_series',
                         'time_series_covid19_confirmed_US.csv')
confirmed = data.load_csv_data(confirmed)
tmpFeatures = []

for val in np.unique(confirmed["Country_Region"]):
    df = data.filter_by_attribute(confirmed, "Country_Region", val)
    cases, _ = data.get_cases_chronologically(df)
    tmpFeatures.append(cases)

tmpFeatures = np.concatenate(tmpFeatures, axis=0)
features = np.sum(tmpFeatures, axis=0)

newCases = np.zeros(features.shape, dtype=np.int32)
i = len(features) - 1
while i > 0:
    newCases[i] = features[i] - features[i - 1]
    i -= 1

newCases[0] = features[0]

dates = np.arange(len(newCases))