Пример #1
0
def get_cumulative_tests_chart_data(json, dates):
    log_status("get_cumulative_tests_chart_data()")
    # Count totals for every day

    date_counts = defaultdict(int)
    dates_within_range = set([str(date.date()) for date in list(dates)])
    date_start = str(dates[0].date())
    count_before_date_range = 0

    for res in json:
        date = res["StatisticsDate"]
        if date in dates_within_range:
            date_counts[date] += 1
        elif date < date_start:
            # Hack to count dates before our range
            count_before_date_range += 1

    tests = []
    for date in dates:
        tests.append(date_counts[str(date.date())])

    # Add everything before first date to first date
    tests[0] += count_before_date_range

    return_json = {"testsAdministered": list(np.cumsum(tests))}

    return return_json
Пример #2
0
def get_confirmed_cases_by_county(json, county_mapping):
    log_status("get_confirmed_cases_by_county()")
    chart_counties = [
        "Harjumaa",
        "Hiiumaa",
        "Ida-Virumaa",
        "Jõgevamaa",
        "Järvamaa",
        "Läänemaa",
        "Lääne-Virumaa",
        "Põlvamaa",
        "Pärnumaa",
        "Raplamaa",
        "Saaremaa",
        "Tartumaa",
        "Valgamaa",
        "Viljandimaa",
        "Võrumaa",
        "Info puudulik",
    ]

    # Count totals for every county
    counts = defaultdict(int)

    for res in json:
        if res["ResultValue"] == "P":
            county = county_mapping[res["County"]]
            counts[county] += 1

    return [counts[county] for county in chart_counties]
Пример #3
0
def download_data(url, destination):
    log_status(f"Downloading {url} to {destination}")

    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(destination, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
Пример #4
0
def get_new_cases_per_day_chart_data(data):
    log_status("get_new_cases_per_day_chart_data()")
    cases = np.diff(data["cases"], prepend=0)

    dataNewCasesPerDayChart = {
        "confirmedCases": list(cases),
    }

    return dataNewCasesPerDayChart
Пример #5
0
def main():
    """Run script"""

    log_dir = os.path.join(args.log_dir, args.meta_model, args.suffix)

    data = DataContainer(
        root=args.root,
        num_pretrain_alphabets=args.num_pretrain,
        num_classes=args.classes,
        seed=args.seed,
        num_workers=args.workers,
        pin_memory=True,
    )

    ###########################################################################

    def evaluate(model, case, step):
        """Run final evaluation"""
        if args.write_ival > 0:
            torch.save(model, join(log_dir, 'model.pth.tar'))

        if case == 'test':
            iterator = data.test
        else:
            iterator = data.val

        iterator = iterator(args.task_batch_size,
                            args.task_val_steps,
                            args.multi_head)

        pp('Evaluating on {} tasks'.format(case))

        results = []
        for i, task in enumerate(iterator):

            if args.write_ival > 0:
                task_model = torch.load(join(log_dir, 'model.pth.tar'))
            else:
                task_model = model

            t = time.time()
            task_results = task_model([task], meta_train=False)
            t = time.time() - t

            results.append(task_results)

            if args.log_ival > 0:
                log_status(task_results, 'task={}'.format(i), t)

        if args.log_ival > 0:
            log_status(consolidate(results), 'task avg', t)

        if args.write_ival > 0:
            write_val_res(results, step, case, log_dir)

        pp('Done')
Пример #6
0
def train(model_object, model, train_loader, test_loader=None):
    device = model_object.device
    if model_object.device.lower() != 'cpu':
        model_object.device = 'cuda:0'

    if model_object.log_path and model_object.verbose > 0:
        log_status(model_object.log_path,
                   'model_params: {}'.format(str(model_object.params)),
                   init=False)

    model = model.to(model_object.device)

    model_object.optimizer = optim.Adam(model.parameters(),
                                        lr=model_object.lr,
                                        weight_decay=model_object.weight_decay)
    kwargs = {
        'pos_weight':
        torch.Tensor([model_object.pos_weight]).to(model_object.device),
        'reduction':
        'mean'
    }
    model_object.criterion = LossGenie('BCEWithLogitsLoss', **kwargs)

    # assisting modules
    model_object.lr_scheduler = optim.lr_scheduler.StepLR(
        model_object.optimizer,
        step_size=model_object.lr_decay_freq,
        gamma=0.5)
    model_object.early_stopper = EarlyStopping(model_object.model_dir,
                                               patience=model_object.patience,
                                               verbose=True)
    model_object.train_metrics = metrics.BinaryClfMetrics()
    model_object.test_metrics = metrics.BinaryClfMetrics()

    for ep in range(1, model_object.epoch + 1):
        # train for an epoch
        train_epoch(ep, model_object, model, train_loader, test_loader)

        # validation for an epoch
        if test_loader is not None:
            valid_epoch(ep, model_object, model, test_loader)

        # return if early stop
        if model_object.early_stopper.early_stop:
            return model

        model_object.lr_scheduler.step()

        if model_object.log_path and model_object.verbose > 0:
            log_status(model_object.log_path,
                       'current lr: {}'.format(
                           model_object.lr_scheduler.get_lr()),
                       init=False)

    return model
Пример #7
0
def train_epoch(ep, model_object, model, train_loader, test_loader=None):
    model.train()
    train_timer = Timer('M')
    model_object.criterion.train()
    model_object.train_metrics.reset()
    print_counter = 0  # counter for printing

    for datum in train_loader:

        model_object.optimizer.zero_grad()
        loss, prob, tgt, _ = predict(ep, model_object, model, datum)
        loss.backward()

        # grad normalization
        grad = clip_grad_norm_(model.parameters(), 10)
        model_object.train_metrics.collect(y_prob=prob, y_true=tgt)

        # update network
        model_object.optimizer.step()

        print_counter += 1
        if print_counter % model_object.print_freq == 0 and model_object.log_path is not None:
            train_auc = model_object.train_metrics.roc_auc()
            train_ap = model_object.train_metrics.average_precision()
            status = '[epoch {}], train batch {} - batch loss: {:.5f}, running train auc: {:.5f}, running train ap: {:.5f} - time taken: {:.2f} mins'
            status = status.format(ep, model_object.criterion.n_batch_train,
                                   model_object.criterion.get_running_loss(),
                                   train_auc, train_ap, train_timer.time())
            log_status(model_object.log_path, status, init=False) if (
                model_object.log_path and model_object.verbose > 0) else None

            # evaluate on validation set
            if test_loader is not None:
                train_timer.pause()
                valid_epoch(ep, model_object, model, test_loader)
                train_timer.resume()
                model.train()

    # print and log status
    train_loss = model_object.criterion.get_running_loss()
    train_auc = model_object.train_metrics.roc_auc()
    train_acc = model_object.train_metrics.accuracy()
    train_ap = model_object.train_metrics.average_precision()

    if model_object.log_path:
        status = '[epoch {}], train loss: {:.5f}, train acc: {:.5f}, train auc: {:.5f}, train ap: {:.5f}, time taken: {:.2f} mins'
        status = status.format(ep, train_loss, train_acc, train_auc, train_ap,
                               train_timer.time_since_start())
        log_status(model_object.log_path, status,
                   init=False) if model_object.verbose > 0 else None

    # reset running criterion losses and timer
    model_object.criterion.reset_train()
    train_timer.reset()
Пример #8
0
def get_in_intensive_data(json, manual_data):
    log_status("get_in_intensive_data()")
    if type(json) is not list or type(manual_data) is not dict:
        return False

    data = get_dict_with_dates_and_key(json, "IsInIntensive")

    output = manual_data

    for day in data:
        if isinstance(data[day], str):
            output[day] = int(data[day])
    return output
Пример #9
0
def get_json_data(url):
    max_retries = 3
    for retry in range(1, max_retries + 1):
        try:
            # Request remote data
            response = requests.get(url=url)

            # Process response
            if response.status_code == 200:
                return response.json()
            else:
                log_status('Endpoint unavailable. Status code: ' +
                           str(response.status_code))
        except:
            # Log error
            log_status('Error when retrieving remote data:')
            log_status(traceback.format_exc())

        # Retry?
        if retry < max_retries:
            log_status("Retrying...")
            sleep(5)

    # Unable to get remote data
    return None
Пример #10
0
def get_positive_negative_chart_data(json, mapping):
    """
    Compile data for the "Positive and negative tests by county" chart.
    """
    log_status("get_positive_negative_chart_data()")

    # Define counties (in order)
    chart_counties = [
        "Info puudulik",
        "Harjumaa",
        "Hiiumaa",
        "Ida-Virumaa",
        "Jõgevamaa",
        "Järvamaa",
        "Läänemaa",
        "Lääne-Virumaa",
        "Põlvamaa",
        "Pärnumaa",
        "Raplamaa",
        "Saaremaa",
        "Tartumaa",
        "Valgamaa",
        "Viljandimaa",
        "Võrumaa",
    ]

    results = [d["ResultValue"] for d in json]
    county = [mapping[d["County"]] for d in json]

    df = pd.DataFrame({"County": county, "ResultValue": results})

    pos_results = df[df.ResultValue == "P"].groupby("County").count()
    neg_results = df[df.ResultValue == "N"].groupby("County").count()

    pos_results.rename(columns={"ResultValue": "Positive"}, inplace=True)
    neg_results.rename(columns={"ResultValue": "Negative"}, inplace=True)

    end_df = pos_results.join(neg_results, how="outer")
    end_df.fillna(0, inplace=True)
    end_df["Positive"] = end_df[["Positive"]].astype("int")

    county_positive = []
    county_negative = []
    for order in chart_counties:
        county_positive.append(end_df.loc[order, "Positive"])
        county_negative.append(end_df.loc[order, "Negative"])

    end_output = {"negative": county_negative, "positive": county_positive}

    return end_output
Пример #11
0
def get_municipality_data(json_test_location, county_mapping):
    log_status("get_municipality_data()")
    municipalities_array = []
    yesterday = datetime.strftime(datetime.today() - timedelta(1), "%Y-%m-%d")
    communes_that_are_summed = ["Tallinn", "Pärnu linn", "Saaremaa vald"]
    communes_that_are_summed_data = {}
    for result in json_test_location:
        if result["StatisticsDate"] == yesterday and result["ResultValue"] == "P":
            if result["Commune"] in communes_that_are_summed:
                if result["Commune"] in communes_that_are_summed_data:
                    communes_that_are_summed_data[result["Commune"]]["range_start"] += result["TotalCasesFrom"]
                    communes_that_are_summed_data[result["Commune"]]["range_end"] += result["TotalCasesTo"]
                else:
                    communes_that_are_summed_data[result["Commune"]] = {
                        "range_start": result["TotalCasesFrom"],
                        "range_end": result["TotalCasesTo"],
                        "County": county_mapping[result["County"]],
                        "Commune": result["Commune"],
                        "ResultValue": result["ResultValue"],
                    }
            else:
                county = county_mapping[result["County"]]
                municipalities_array.append(
                    [
                        county,
                        result["Commune"],
                        result["Village"],
                        result["ResultValue"],
                        result["TotalCasesFrom"],
                        result["TotalCasesTo"],
                    ]
                )

    for commune_index in communes_that_are_summed_data:
        commune = communes_that_are_summed_data[commune_index]
        if commune["range_end"] > 0:
            municipalities_array.append(
                [
                    commune["County"],
                    commune["Commune"],
                    "",
                    commune["ResultValue"],
                    commune["range_start"],
                    commune["range_end"],
                ]
            )

    municipalities_json = {"municipalitiesData": municipalities_array}
    return municipalities_json
Пример #12
0
def get_on_ventilation_data(json):
    log_status("get_on_ventilation_data()")
    if type(json) is not list:
        return False

    output = {}

    data = get_dict_with_dates_and_key(json, "IsOnVentilation")

    for day in data:
        if data[day] is None:
            data[day] = "0"
        if isinstance(data[day], str):
            output[day] = int(data[day])
    return output
Пример #13
0
def valid_epoch(ep, model_object, model, test_loader):
    model.eval()
    model_object.criterion.eval()

    valid_start = time.time()
    valid_timer = Timer('M')

    preds, targets = [], []
    model_object.valid_formatter = InferenceFormatter(
    )  # formatter for debugging purpose
    model_object.valid_formatter_target = InferenceFormatter(
    )  # formatter for debugging purpose
    model_object.test_metrics.reset()
    for datum in test_loader:
        loss, prob, tgt, meta = predict(ep, model_object, model, datum)
        model_object.test_metrics.collect(y_prob=prob, y_true=tgt)
        model_object.valid_formatter.collect(
            prob, meta)  # formatter for debugging purpose
        model_object.valid_formatter_target.collect(
            tgt, meta)  # formatter for debugging purpose

    valid_loss = model_object.criterion.get_running_loss()
    valid_auc = model_object.test_metrics.roc_auc()
    valid_acc = model_object.test_metrics.accuracy()
    valid_ap = model_object.test_metrics.average_precision()

    status = '[epoch {}], valid loss: {:.5f}, valid acc: {:.5f}, valid auc: {:.5f}, valid ap: {:0.5f}, time taken: {:.2f} mins'
    status = status.format(ep, valid_loss, valid_acc, valid_auc, valid_ap,
                           valid_timer.time())

    if model_object.log_path:
        log_status(model_object.log_path, status,
                   init=False) if model_object.verbose > 0 else None
        log_status(model_object.log_path,
                   str(
                       model_object.test_metrics.top_k_percentile(
                           k=20, step=1).to_string()),
                   init=False) if model_object.verbose > 1 else None

    model_object.early_stopper(model_object.criterion.get_last_valid_loss(),
                               model, model_object)

    # reset validation losses and set criterion to training mode
    model_object.criterion.reset_test()
    model_object.criterion.train()
Пример #14
0
def get_tests_per_day_chart_data(json, dates):
    log_status("get_tests_per_day_chart_data()")
    # Count totals for every day

    date_counts = defaultdict(int)
    date_positive = defaultdict(int)
    dates_within_range = set([str(date.date()) for date in list(dates)])
    date_start = str(dates[0].date())
    count_before_date_range = 0
    count_positive_before_date_range = 0

    for res in json:
        date = str(pd.to_datetime(res["StatisticsDate"]).date())
        if date in dates_within_range:
            date_counts[date] += 1
            if res["ResultValue"] == "P":
                date_positive[date] += 1
        elif date < date_start:
            # Hack to count dates before our range
            count_before_date_range += 1
            if res["ResultValue"] == "P":
                count_positive_before_date_range += 1

    tests = []
    positive_tests = []
    for date in dates:
        tests.append(date_counts[str(date.date())])
        positive_tests.append(date_positive[str(date.date())])

    # Add everything before first date to first date
    tests[0] += count_before_date_range
    positive_tests[0] += count_positive_before_date_range
    positive_test_percentage = (
        list(np.round(np.array(positive_tests) / np.array(tests) * 100, 2)),
    )
    return_json = {
        "positiveTestsPerDay": positive_tests,
        "negativeTestsPerDay": list(np.array(tests) - np.array(positive_tests)),
        "positiveTestsPercentage": positive_test_percentage[0],
        "positiveTestAverage14Percent": np.round(
            np.average(positive_test_percentage[0][-14:]), 2
        ),
    }

    return return_json
Пример #15
0
def get_vaccinated_people_chart_data(json, dates):
    log_status("get_vaccinated_people_chart_data()")
    date_counts_progress = defaultdict(int)
    date_counts_completed = defaultdict(int)
    date_counts = defaultdict(int)

    dates_within_range = set([str(date.date()) for date in list(dates)])

    json_progress = [x for x in json if x["MeasurementType"] == "Vaccinated" and x["VaccinationSeries"] == 1]
    json_completed = [x for x in json if x["MeasurementType"] == "FullyVaccinated" and x["VaccinationSeries"] == 1]

    for res in json:
        date = res["StatisticsDate"]
        if date in dates_within_range:
            if res["MeasurementType"] == "FullyVaccinated" and res["VaccinationSeries"] == 1:
                date_counts_progress[date] -= res["DailyCount"]
            elif res["MeasurementType"] == "Vaccinated" and res["VaccinationSeries"] == 1:
                date_counts_progress[date] += res["DailyCount"]
    for res in json_completed:
        date = res["StatisticsDate"]
        if date in dates_within_range:
            date_counts_completed[date] += res["DailyCount"]
    for res in json_progress:
        date = res["StatisticsDate"]
        if date in dates_within_range:
            date_counts[date] += res["DailyCount"]

    vacc_progress = []
    vacc_completed = []
    vacc = []

    for date in dates:
        vacc_progress.append(date_counts_progress[str(date.date())])
        vacc_completed.append(date_counts_completed[str(date.date())])
        vacc.append(date_counts[str(date.date())])

    return_json = {
        "vaccinesProgress": list(np.cumsum(vacc_progress)),
        "vaccinesCompleted": list(np.cumsum(vacc_completed)),
        "vaccinesAll": list(np.cumsum(vacc)),
    }

    return return_json
Пример #16
0
def get_cumulative_cases_chart_data(
    test_results,
    dates,
    tests_per_day_data,
):
    log_status("get_cumulative_cases_chart_data()")
    date_counts = defaultdict(int)
    for res in test_results:
        if res["ResultValue"] == "P":
            date = res["StatisticsDate"]
            date_counts[date] += 1

    confirmed_cases = []

    for date in dates:
        confirmed_cases.append(date_counts[str(date.date())])

    cases = np.cumsum(confirmed_cases)

    new_cases_14 = [tests_per_day_data["positiveTestsPerDay"][0]]
    for i in range(1, 14):
        new_cases_14.append(
            new_cases_14[i - 1] + tests_per_day_data["positiveTestsPerDay"][i]
        )

    for i in range(14, len(tests_per_day_data["positiveTestsPerDay"])):
        new_cases_14.append(
            new_cases_14[i - 1] - tests_per_day_data["positiveTestsPerDay"][i - 14] + tests_per_day_data["positiveTestsPerDay"][i]
        )

    estonian_population = 1_328_976  # From https://www.stat.ee/en/find-statistics/statistics-theme/population/population-figure
    per_100k_multiplier = 100_000 / estonian_population
    new_cases_14_per_100k = [
        round(active_cases * per_100k_multiplier, 2) for active_cases in new_cases_14
    ]

    cumulative_cases_chart_data = {
        "cases": list(cases),
        "active": new_cases_14,
        "active100k": new_cases_14_per_100k,
    }

    return cumulative_cases_chart_data
Пример #17
0
def scrape_deaths():
    # Load content from Terviset's Covid dashboard and parse it
    log_status("Scraping data on deaths from " + TERVISEAMET_COVID_DASHBOARD)
    html = requests.get(TERVISEAMET_COVID_DASHBOARD).text
    soup = BeautifulSoup(html, "html.parser")

    # Extract number of deaths from page content and update JSON data on deaths
    deaths_container = soup.select(DEATHS_SELECTOR)
    if len(deaths_container) > 0:
        try:
            # Get number of deaths and the current date
            deaths_count = int(deaths_container[0].text.strip())
            current_date = (datetime.now() -
                            timedelta(days=1)).strftime("%Y-%m-%d")

            # Load existing deaths data
            json_deaths = read_json_from_file(DEATHS_PATH)

            # Add new entry to deaths data for current date
            deaths_output = {}
            if len(json_deaths):
                deaths_output = json_deaths
            deaths_output[current_date] = deaths_count

            # Save data on deaths
            save_as_json(DEATHS_PATH + ".tmp", deaths_output)

            # Log status
            log_status("Successfully scraped deaths. Total deaths: " +
                       str(deaths_count))
        except:
            # Log error
            error_message = "Error when scraping data on deaths"
            log_status(error_message + ":")
            log_status(traceback.format_exc())
            raise Exception(error_message)
    else:
        # Log error
        error_message = "Error: could not find page element with data on deaths"
        log_status(error_message)
        raise Exception(error_message)
Пример #18
0
def get_infection_count_by_county(json, county_mapping) -> list:
    log_status("get_infection_count_by_county()")
    # Ordering of counties for chart
    map_counties = [
        "Harjumaa",
        "Hiiumaa",
        "Ida-Virumaa",
        "Jõgevamaa",
        "Järvamaa",
        "Läänemaa",
        "Lääne-Virumaa",
        "Põlvamaa",
        "Pärnumaa",
        "Raplamaa",
        "Saaremaa",
        "Tartumaa",
        "Valgamaa",
        "Viljandimaa",
        "Võrumaa",
    ]

    # For performance, hashing
    set_map_counties = set(map_counties)

    counts = defaultdict(int)

    for res in json:
        # Check if test positive
        if res["ResultValue"] == "P":
            # Get county and map it, exclude unknown ones
            county = county_mapping[res["County"]]
            if county in set_map_counties:
                counts[county] += 1

    # Create list of lists as in current json
    result_array = [[county, counts[county], county] for county in map_counties]

    return result_array
Пример #19
0
def get_hospital_data(json_hospitalisation, start_date):
    log_status("get_hospital_data()")
    hospitalizations = []
    active_hospitalizations = []
    intensive = []
    start_date = datetime.strptime(start_date, "%Y-%m-%d")

    for result in json_hospitalisation:
        statistics_date = datetime.strptime(result["StatisticsDate"].split("T")[0], "%Y-%m-%d")
        if statistics_date >= start_date:
            hospitalizations += [int(result["Hospitalised"])]
            active_hospitalizations += [int(result["ActivelyHospitalised"])]
            if result["IsInIntensive"] != None:
                intensive += [int(result["IsInIntensive"])]
            else:
                intensive += [result["IsInIntensive"]]

    hospital_results = {
        "hospitalizations": hospitalizations,
        "activehospitalizations": active_hospitalizations,
        "intensive": intensive,
    }
    return hospital_results
Пример #20
0
def get_county_by_day(json, dates, county_mapping, county_sizes):
    log_status("get_county_by_day()")
    chart_counties = [
        "Harjumaa",
        "Hiiumaa",
        "Ida-Virumaa",
        "Jõgevamaa",
        "Järvamaa",
        "Läänemaa",
        "Lääne-Virumaa",
        "Põlvamaa",
        "Pärnumaa",
        "Raplamaa",
        "Saaremaa",
        "Tartumaa",
        "Valgamaa",
        "Viljandimaa",
        "Võrumaa",
    ]

    county_date_counts = defaultdict(int)

    for res in json:
        if res["ResultValue"] == "P":
            date = res["StatisticsDate"]
            county = county_mapping[res["County"]]
            if county in chart_counties:
                county_date_counts[(county, date)] += 1

    county_by_day = {}
    new_county_by_day = {}
    map_playback = []
    map_playback_10k = []
    for county in chart_counties:
        per_day_county = []
        per_day_county_10k = []
        for date in dates:
            val = county_date_counts[(county, str(date.date()))]
            per_day_county.append(val)
            per_day_county_10k.append((val / county_sizes[county] * 10000))

        map_playback.append(
            {
                "MNIMI": county,
                "sequence": list(np.cumsum(per_day_county)),
                "drilldown": county,
            }
        )
        map_playback_10k.append(
            {
                "MNIMI": county,
                "sequence": list(np.round(np.cumsum(per_day_county_10k), 2)),
                "drilldown": county,
            }
        )
        # Calculate cumulative
        new_county_by_day[county] = list(per_day_county)
        county_by_day[county] = list(np.cumsum(per_day_county))

    county_by_day_new = [
        county_by_day[county][-1] - county_by_day[county][-2]
        for county in chart_counties
    ]

    county_list = {
        "countyByDay": county_by_day,
        "countyByDayNew": county_by_day_new,
        "newCountyByDay": new_county_by_day,
        "mapPlayback": map_playback,
        "mapPlayback10k": map_playback_10k,
    }

    return county_list
Пример #21
0
def main():
    # Log status
    log_status("Starting to generate chart data at " + str(TODAY_DMYHM))


    # 1.  Create date ranges for charts

    log_status("Creating date ranges for charts")
    case_dates = pd.date_range(start=DATE_SETTINGS["first_case_date"], end=YESTERDAY_YMD)
    vaccination_dates = pd.date_range(start=DATE_SETTINGS["vaccination_start_date"], end=YESTERDAY_YMD)


    # 2.  Calculate data related to deaths

    try:
        deaths = read_json_from_file(DEATHS_PATH)
        manual_data = read_json_from_file(MANUAL_DATA_PATH)
    except:
        # Log error
        log_status('Error when loading local data:')
        log_status(traceback.format_exc())
        exit()

    log_status("Calculating data related to deaths")

    manual_data["deceased"].update(deaths)
    deceased = list(manual_data["deceased"].values())
    n_deaths = deceased[-1]
    n_deaths_change = int(deceased[-1]) - int(deceased[-2])


    # 3.  Calculate data related to test results

    # Define columns to import
    column_list = [
        'Gender',
        'AgeGroup',
        'County',
        'ResultValue',
        'StatisticsDate'
    ]

    test_results = get_json_from_csv_file(TEST_RESULTS_PATH, column_list)
    
    log_status("Calculating data related to test results")

    # Find count of confirmed cases
    n_confirmed_cases = np.sum([res["ResultValue"] == "P" for res in test_results])

    # Find total number of tests
    n_tests_administered = len(test_results)
    log_status("Total number of tests: " + str(n_tests_administered))

    infections_by_county = get_infection_count_by_county(test_results, county_mapping)
    county_by_day = get_county_by_day(test_results, case_dates, county_mapping, county_sizes)
    confirmed_cases_by_county = get_confirmed_cases_by_county(test_results, county_mapping)
    tests_per_day_chart_data = get_tests_per_day_chart_data(test_results, case_dates)
    cumulative_cases_chart_data = get_cumulative_cases_chart_data(
        test_results,
        case_dates,
        tests_per_day_chart_data
    )
    cumulative_tests_chart_data = get_cumulative_tests_chart_data(test_results, case_dates)
    positive_test_by_age_chart_data = get_positive_tests_by_age_chart_data(test_results)
    positive_negative_chart_data = get_positive_negative_chart_data(test_results, county_mapping)
    county_daily_active = get_county_daily_active(test_results, case_dates, county_mapping, county_sizes)

    # Delete test result data from memory
    del test_results

    infections_by_county_10000 = get_infections_data_by_count_10000(infections_by_county, county_sizes)
    tests_pop_ratio = get_test_data_pop_ratio(infections_by_county_10000)
    new_cases_per_day_chart_data = get_new_cases_per_day_chart_data(cumulative_cases_chart_data)
    n_active_cases = cumulative_cases_chart_data["active"][-1]
    n_active_cases_change = (cumulative_cases_chart_data["active"][-1] - cumulative_cases_chart_data["active"][-2])
    per_100k = cumulative_cases_chart_data["active100k"][-1]
    active_infections_by_county = [
        {"MNIMI": k, "sequence": v, "drilldown": k}
        for k, v in county_daily_active["countyByDayActive"].items()
    ]
    active_infections_by_county_100k = [
        [k, round(v[-1] / county_sizes[k] * 100000, 2)]
        for k, v in county_daily_active["countyByDayActive"].items()
    ]


    # 4.  Calculate data related to test locations

    test_locations = read_json_from_file(TEST_LOCATIONS_PATH)

    municipalities_data = get_municipality_data(test_locations, county_mapping)

    # Delete test location data from memory
    del test_locations


    # 5.  Calculate data related to hospitalisation

    hospitalization = read_json_from_file(HOSPITALIZATION_PATH)

    log_status("Calculating data related to hospitalisation")

    # Set hospitalised and ICU time-series
    hospital = get_hospital_data(hospitalization, DATE_SETTINGS["first_case_date"])
    # TODO: Based on cross-checking with the hospitalisation data published by TEHIK, the data listed
    #       in the manual_data.json file with the field name "intensive" appears to show the number
    #       of patients on ventilation. We should fix the terminology and make sure that the intensive
    #       and on ventilation statistics are being calculated correctly.
    intensive = list(get_in_intensive_data(hospitalization, manual_data["intensive"]).values())
    on_ventilation = list(get_on_ventilation_data(hospitalization).values())
    # Delete hospitalization data from memory
    del hospitalization

    hospitalised = hospital["activehospitalizations"]
    n_on_ventilation = on_ventilation[-1]
    n_on_ventilation_change = int(on_ventilation[-1]) - int(on_ventilation[-2])


    # 6.  Calculate data related to vaccination

    vaccination = read_json_from_file(VACCINATIONS_PATH)

    log_status("Calculating data related to vaccination")

    vaccinated_people_chart_data = get_vaccinated_people_chart_data(vaccination, vaccination_dates)

    last_day_vaccination_data = [x for x in vaccination if x["MeasurementType"] == "Vaccinated" and x["VaccinationSeries"] == 1][-1]
    last_day_completed_vaccination_data = [x for x in vaccination if x["MeasurementType"] == "FullyVaccinated" and x["VaccinationSeries"] == 1][-1]
    last_day_doses_administered_data = [x for x in vaccination if x["MeasurementType"] == "DosesAdministered" and x["VaccinationSeries"] == 1][-1]
    # Delete vaccination data from memory
    del vaccination

    n_fully_vaccinated = last_day_completed_vaccination_data["TotalCount"]
    n_fully_vaccinated_change = last_day_completed_vaccination_data["DailyCount"]
    n_fully_vaccinated_percentage = last_day_completed_vaccination_data["PopulationCoverage"]
    n_vaccinated_at_least_one_dose = last_day_vaccination_data["TotalCount"]
    n_vaccinated_at_least_one_dose_change = last_day_vaccination_data["DailyCount"]
    n_vaccinated_at_least_one_dose_percentage = last_day_vaccination_data["PopulationCoverage"]
    # vaccination_number_total = (n_vaccinated_at_least_one_dose - n_fully_vaccinated)
    # vaccination_number_last_day = (n_vaccinated_at_least_one_dose_change - n_fully_vaccinated_change)


    # 7.  Create and save final JSON

    log_status("Compiling final JSON")

    final_json = {
        "updatedOn": TODAY_DMYHM,
        "confirmedCasesNumber": str(n_confirmed_cases),
        # TODO: For consistency, we should include the change in the number of confirmed cases as well.
        "hospitalisedNumber": str(hospital["activehospitalizations"][-1]),
        "hospitalChanged": str(hospital["activehospitalizations"][-1] - hospital["activehospitalizations"][-2]),
        "onVentilation": on_ventilation,
        "onVentilationNumber": n_on_ventilation,
        "onVentilationChanged": n_on_ventilation_change,
        "deceased": deceased,
        "deceasedNumber": str(n_deaths),
        "deceasedChanged": str(n_deaths_change),
        "testsAdministeredNumber": str(n_tests_administered),
        # TODO: For consistency, we should include the change in the number of tests as well.
        "activeCasesNumber": str(n_active_cases),
        "activeChanged": str(n_active_cases_change),
        "perHundred": str(per_100k), # TODO: This should be given a clearer name.
        "dates2": [str(x.date()) for x in case_dates],  # TODO: Change key to "caseDates"
        "dates3": [str(x.date()) for x in vaccination_dates],  # TODO: Change key to "vaccinationDates"
        "counties": counties,
        "age_groups": age_groups,
        "dataInfectionsByCounty": infections_by_county,
        "dataInfectionsByCounty10000": infections_by_county_10000,
        "dataActiveInfectionsByCounty100k": active_infections_by_county_100k,
        "dataActiveInfectionsByCounty": active_infections_by_county,
        "dataTestsPopRatio": tests_pop_ratio,
        "countyByDay": county_by_day,
        "dataCountyDailyActive": county_daily_active,
        "dataConfirmedCasesByCounty": confirmed_cases_by_county,
        "dataCumulativeCasesChart": cumulative_cases_chart_data,
        "dataNewCasesPerDayChart": new_cases_per_day_chart_data,
        "dataCumulativeTestsChart": cumulative_tests_chart_data,
        "dataTestsPerDayChart": tests_per_day_chart_data,
        "dataPositiveTestsByAgeChart": positive_test_by_age_chart_data,
        "dataPositiveNegativeChart": positive_negative_chart_data,
        "dataVaccinatedPeopleChart": vaccinated_people_chart_data,
        "dataMunicipalities": municipalities_data,
        "hospital": hospital, # TODO: Rename this to make it clearer what data it contains.
        # "vaccinationNumberTotal": vaccination_number_total,
        # "vaccinationNumberLastDay": vaccination_number_last_day,
        "fullyVaccinatedNumber": n_fully_vaccinated,
        "fullyVaccinatedNumberChange": n_fully_vaccinated_change,
        "fullyVaccinatedNumberPercentage": n_fully_vaccinated_percentage,
        "vaccinatedAtLeastOneDoseNumber": n_vaccinated_at_least_one_dose,
        "vaccinatedAtLeastOneDoseChange": n_vaccinated_at_least_one_dose_change,
        "vaccinatedAtLeastOneDosePercentage": n_vaccinated_at_least_one_dose_percentage,
    }

    # Dump JSON output
    log_status("Dumping JSON output")
    save_as_json(OUTPUT_FILE_LOCATION, final_json)

    # Log finish time
    finish = datetime.today().astimezone(ESTONIA_TIMEZONE).strftime("%d/%m/%Y, %H:%M")
    log_status("Finished update process at " + finish)
Пример #22
0
                                    args.task_train_steps,
                                    args.multi_head)

            results = model(task_batch, meta_train=True)

            train_step += 1
            if train_step % args.write_ival == 0:
                write_train_res(results, train_step, log_dir)

            if train_step % args.test_ival == 0:
                evaluate(model, 'val', train_step)
                pp("Resuming training")

            if args.log_ival > 0 and train_step % args.log_ival == 0:
                t = (time.time() - t) / args.log_ival
                log_status(results, 'step={}'.format(train_step), t)
                t = time.time()

            if results.train_loss != results.train_loss:
                break

            if train_step == args.meta_train_steps:
                break

    except KeyboardInterrupt:
        pp('Meta-training stopped.')
    else:
        pp('Meta-training complete.')

    try:
        model = torch.load(join(log_dir, 'model.pth.tar'))
Пример #23
0
def train(args):
    # make dataset for train and validation
    assert args.lr_train_path is not None
    assert args.hr_train_path is not None
    assert args.lr_val_path is not None
    assert args.hr_val_path is not None
    # patch the train data for training
    train_dataset = SRDataset(lr_path=args.lr_train_path,
                              hr_path=args.hr_train_path,
                              patch_size=args.patch_size,
                              scale=args.scale,
                              aug=args.augment,
                              normalization=args.normalization,
                              need_patch=True,
                              suffix=args.suffix)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.n_threads)

    val_dataset = SRDataset(lr_path=args.lr_val_path,
                            hr_path=args.hr_val_path,
                            patch_size=args.patch_size,
                            scale=args.scale,
                            normalization=args.normalization,
                            need_patch=True,
                            suffix=args.suffix)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=args.n_threads)

    # chech log
    check_logs(args)
    writer = SummaryWriter(log_dir=args.tblog)
    # check for gpu
    device = check_hardware(args)
    # check the model
    module = import_module('model.' + args.model.lower())
    model = module.wrapper(args)

    # continue train or not
    start_epoch = 0
    best_val_psnr = -1.0
    best_val_loss = 1e8
    if args.continue_train:
        status_ = load_status(args.status_logger)
        args.lr = status_['lr']
        start_epoch = status_['epoch']
        best_val_loss = status_['best_val_loss']

        pretrained_dict = torch.load(status_['last_weight_pth'])
        model_dict = model.state_dict()
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
        logger.info(
            f"Load model from {status_['last_weight_pth']} for continuing train."
        )

    if not args.cpu:
        model = model.to(device)
    # check the optimizer
    optimizer = check_optimizer_(args, model)
    # check the lr schedule
    lr_schedule = StepLR(optimizer, args.decay_step, args.gamma)
    # check the loss
    criterion = check_loss_(args)

    # for iteration to train the model and validation for every epoch
    for epoch in range(start_epoch, args.epochs):
        torch.cuda.empty_cache()

        train_loss = 0.0
        model.train()
        for batch, data in enumerate(train_dataloader):
            x = data['lr']
            y = data['hr']
            x = x.to(device)
            y = y.to(device)

            # perform forward calculation
            y_hat = model(x)
            loss_ = criterion(y_hat, y)
            train_loss += loss_.item()
            logger.info("Epoch-%d-Batch-%d, train loss: %.4f" %
                        (epoch, batch, loss_.item()))
            writer.add_scalar(f'Train/Batchloss',
                              loss_.item(),
                              global_step=epoch * (len(train_dataloader)) +
                              batch)

            # perform backward calculation
            optimizer.zero_grad()
            loss_.backward()
            # perform gradient clipping
            if args.gclip > 0:
                nn.utils.clip_grad_value_(model.parameters(), args.gclip)
            optimizer.step()
        train_loss = train_loss / (batch + 1)
        logger.info("Epoch-%d, train loss: %.4f" % (epoch, train_loss))
        writer.add_scalar(f'Train/Epochloss', train_loss, global_step=epoch)

        # validation
        model.eval()
        with torch.no_grad():
            val_loss = 0.0
            val_psnr = 0.0
            for batch, data in enumerate(val_dataloader):
                x = data['lr']
                y = data['hr']
                x = x.to(device)
                y = y.to(device)

                y_hat = model(x)
                loss_ = criterion(y_hat, y)
                val_loss += loss_.item()

                # save the intermedia result for visualization
                y = y[0].detach().cpu().numpy()
                y_hat = y_hat[0].detach().cpu().numpy()
                y = np.transpose(y, (1, 2, 0))
                y_hat = np.transpose(y_hat, (1, 2, 0))
                # if args.normalization == 1:
                #     y = y * 255.0
                #     y_hat = y_hat * 255.0
                y = denormalize_(y, args.normalization)
                y_hat = denormalize_(y_hat, args.normalization)
                # clip is really important, otherwise the anomaly rgb noise data exists
                y = np.clip(y, 0.0, 255.0)
                y_hat = np.clip(y_hat, 0.0, 255.0)

                _res = np.concatenate([y_hat, y], axis=1).astype(np.uint8)
                cv2.imwrite(
                    os.path.join(args.log_img_root, f'{epoch}_{batch}.png'),
                    _res)

            val_loss = val_loss / (batch + 1)
            logger.info("Epoch-%d, validation loss: %.4f" % (epoch, val_loss))
            writer.add_scalar(f'Val/loss', val_loss, global_step=epoch)

        # adjust the learning rate
        lr_schedule.step(epoch=epoch)
        writer.add_scalar(f'Train/lr',
                          lr_schedule.get_lr()[0],
                          global_step=epoch)

        # save the best validation psnr model parameters
        if best_val_loss > val_loss:
            best_val_loss = val_loss
            model.eval().cpu()
            torch.save(model.state_dict(), args.weight_pth)
            logger.info(f"Save {args.weight_pth}")
            model.to(device).train()

        # log the training status
        model.eval().cpu()
        torch.save(model.state_dict(), args.status_pth)
        model.to(device).train()
        status_ = {
            'epoch': epoch,
            'lr': lr_schedule.get_lr()[0],
            'best_val_loss': best_val_loss,
            'last_weight_pth': args.status_pth,
        }
        log_status(args.status_logger, **status_)
Пример #24
0
def main():
    # Log status
    log_status("Starting data update process at " + str(today))

    # Get current number of deaths from Terviseamet's Covid dashboard
    try:
        scrape_deaths()
    except:
        log_status("Aborting data update.")
        exit()

    # Load data from external services
    log_status("Downloading data from TEHIK: Test results")
    json_testing = get_json_data(TESTING_ENDPOINT)
    log_status("Downloading data from TEHIK: Location data")
    json_test_location = get_json_data(TEST_LOCATION_ENDPOINT)
    log_status("Downloading data from TEHIK: Hospitalisation data")
    json_hospitalisation = get_json_data(HOSPITALISATION_ENDPOINT)
    log_status("Downloading data from TEHIK: Vaccination data")
    json_vaccination = get_json_data(VACCINATION_ENDPOINT)

    # Validate data from remote endpoints
    # TODO: Add checks that the testing and vaccination data are up to date. We will need to adopt
    #       a different approach than for the test location and hospitalisation data due to the fact
    #       that the data structure of the JSON is different. Checking the "Last-Modified" header of the
    #       response may be the way to go and would handle the possibility that there are no tests or
    #       vaccinations on a particular day.
    ok = True
    if json_testing is None:
        log_status("Unable to retrieve testing data")
        ok = False
    if json_test_location is None:
        log_status("Unable to retrieve location data")
        ok = False
    elif not is_up_to_date(json_test_location, "LastStatisticsDate"):
        log_status("Location data is not up-to-date")
        ok = False
    if json_hospitalisation is None:
        log_status("Unable to retrieve hospitalisation data")
        ok = False
    elif not is_up_to_date(json_hospitalisation, "LastLoadStatisticsDate"):
        log_status("Hospitalisation data is not up-to-date")
        ok = False
    if json_vaccination is None:
        log_status("Unable to retrieve vaccination data")
        ok = False
    # TODO: Review whether this check is needed. I have commented it out for now.
    # if not is_header_last_modified_up_to_date(TEST_LOCATION_ENDPOINT):
    #     log_status("Location data last modified is not up-to-date")
    #     ok = False

    if not ok:
        log_status(
            "One or more of the TEHIK APIs has not been updated or could not be retrieved."
        )
        log_status("Aborting data update.")
        exit()

    # Load locally-stored data
    log_status("Loading local data files")
    try:
        json_deaths = read_json_from_file(DEATHS_FILE_LOCATION)
        json_manual = read_json_from_file(MANUAL_DATA_FILE_LOCATION)
    except:
        # Log error
        log_status('Error when loading local data:')
        log_status(traceback.format_exc())
        exit()

    # Log status
    log_status("Calculating main statistics")

    # Statsbar
    # Find count of confirmed cases
    n_confirmed_cases = np.sum(
        [res["ResultValue"] == "P" for res in json_testing])

    # Find total number of tests
    n_tests_administered = len(json_testing)

    # Create date ranges for charts
    # dates1 = pd.date_range(start=DATE_SETTINGS["dates1_start"], end=yesterday)
    dates2 = pd.date_range(start=DATE_SETTINGS["dates2_start"], end=yesterday)
    dates3 = pd.date_range(start=DATE_SETTINGS["dates3_start"], end=yesterday)

    # Set recovered, deceased, hospitalised and ICU time-series
    hospital = get_hospital_data(json_hospitalisation,
                                 DATE_SETTINGS["dates2_start"])
    recovered = hospital["discharged"]
    json_manual["deceased"].update(json_deaths)
    deceased = list(json_manual["deceased"].values())
    hospitalised = hospital["activehospitalizations"]
    # TODO: Based on cross-checking with the hospitalisation data publishedby TEHIK, the data listed
    #       in the manual_data.json file with the field name "intensive" appears to show the number
    #       of patients on ventilation. We should fix the terminology and make sure that the intensive
    #       and on ventilation statistics are being calculated correctly.
    intensive = list(
        get_in_intensive_data(json_hospitalisation,
                              json_manual["intensive"]).values())
    on_ventilation = list(
        get_on_ventilation_data(json_hospitalisation).values())

    n_deaths = deceased[-1]
    n_deaths_change = int(deceased[-1]) - int(deceased[-2])

    # Get data for each chart
    log_status("Calculating data for charts")
    infections_by_county = get_infection_count_by_county(
        json_testing, county_mapping)
    infections_by_county_10000 = get_infections_data_by_count_10000(
        infections_by_county, county_sizes)
    tests_pop_ratio = get_test_data_pop_ratio(infections_by_county_10000)
    county_by_day = get_county_by_day(json_testing, dates2, county_mapping,
                                      county_sizes)
    confirmed_cases_by_county = get_confirmed_cases_by_county(
        json_testing, county_mapping)
    cumulative_cases_chart_data = get_cumulative_cases_chart_data(
        json_testing, recovered, deceased, hospitalised, intensive,
        on_ventilation, dates2)
    new_cases_per_day_chart_data = get_new_cases_per_day_chart_data(
        cumulative_cases_chart_data)
    cumulative_tests_chart_data = get_cumulative_tests_chart_data(
        json_testing, dates2)
    tests_per_day_chart_data = get_tests_per_day_chart_data(
        json_testing, dates2)
    positive_test_by_age_chart_data = get_positive_tests_by_age_chart_data(
        json_testing)
    positive_negative_chart_data = get_positive_negative_chart_data(
        json_testing, county_mapping)
    vaccinated_people_chart_data = get_vaccinated_people_chart_data(
        json_vaccination, dates3)
    county_daily_active = get_county_daily_active(json_testing, dates2,
                                                  county_mapping, county_sizes)
    n_active_cases = cumulative_cases_chart_data["active"][-1]
    n_active_cases_change = (cumulative_cases_chart_data["active"][-1] -
                             cumulative_cases_chart_data["active"][-2])
    active_infections_by_county = [{
        "MNIMI": k,
        "sequence": v,
        "drilldown": k
    } for k, v in county_daily_active["countyByDayActive"].items()]
    active_infections_by_county_100k = [[
        k, round(v[-1] / county_sizes[k] * 100000, 2)
    ] for k, v in county_daily_active["countyByDayActive"].items()]
    municipalities_data = get_municipality_data(json_test_location,
                                                county_mapping)
    per_100k = cumulative_cases_chart_data["active100k"][-1]

    # Calculate vaccination data
    log_status("Calculating vaccination data")
    last_day_vaccination_data = [
        x for x in json_vaccination if x["MeasurementType"] == "Vaccinated"
    ][-1]
    last_day_completed_vaccination_data = [
        x for x in json_vaccination
        if x["MeasurementType"] == "FullyVaccinated"
    ][-1]
    # TODO: Doses administered
    # last_day_doses_administered_data = [x for x in json_vaccination if x['MeasurementType'] == 'DosesAdministered'][-1]
    completed_vaccination_number_total = last_day_completed_vaccination_data[
        "TotalCount"]
    completed_vaccination_number_last_day = last_day_completed_vaccination_data[
        "DailyCount"]
    all_vaccination_number_total = last_day_vaccination_data["TotalCount"]
    all_vaccination_number_last_day = last_day_vaccination_data["DailyCount"]
    vaccination_number_total = (all_vaccination_number_total -
                                completed_vaccination_number_total)
    vaccination_number_last_day = (all_vaccination_number_last_day -
                                   completed_vaccination_number_last_day)
    fully_vaccinated_from_total_vaccinated_percentage = round(
        completed_vaccination_number_total * 100 /
        (all_vaccination_number_total), 2)

    # Create dictionary for final JSON
    log_status("Compiling final JSON")
    final_json = {
        "updatedOn":
        today,
        "confirmedCasesNumber":
        str(n_confirmed_cases),
        # TODO: For consistency, we should include the change in the number of confirmed cases as well.
        "hospitalisedNumber":
        str(hospital["activehospitalizations"][-1]),
        "hospitalChanged":
        str(hospital["activehospitalizations"][-1] -
            hospital["activehospitalizations"][-2]),
        "deceasedNumber":
        str(n_deaths),
        "deceasedChanged":
        str(n_deaths_change),
        "recoveredNumber":
        str(hospital["discharged"][-1]),
        "recoveredChanged":
        str(hospital["discharged"][-1] - hospital["discharged"][-2]),
        "testsAdministeredNumber":
        str(n_tests_administered),
        # TODO: For consistency, we should include the change in the number of tests as well.
        "activeCasesNumber":
        str(n_active_cases),
        "activeChanged":
        str(n_active_cases_change),
        "perHundred":
        str(per_100k),  # TODO: This should be given a clearer name.
        # TODO: I can't find anywhere in the app where "dates1" is used. Is it needed? Commented out for now.
        # "dates1": [str(x.date()) for x in dates1],
        "dates2": [str(x.date()) for x in dates2],
        "dates3": [str(x.date()) for x in dates3],
        "counties":
        counties,
        "age_groups":
        age_groups,
        "dataInfectionsByCounty":
        infections_by_county,
        "dataInfectionsByCounty10000":
        infections_by_county_10000,
        "dataActiveInfectionsByCounty100k":
        active_infections_by_county_100k,
        "dataActiveInfectionsByCounty":
        active_infections_by_county,
        "dataTestsPopRatio":
        tests_pop_ratio,
        "countyByDay":
        county_by_day,
        "dataCountyDailyActive":
        county_daily_active,
        "dataConfirmedCasesByCounties":
        confirmed_cases_by_county,
        "dataCumulativeCasesChart":
        cumulative_cases_chart_data,
        "dataNewCasesPerDayChart":
        new_cases_per_day_chart_data,
        "dataCumulativeTestsChart":
        cumulative_tests_chart_data,
        "dataTestsPerDayChart":
        tests_per_day_chart_data,
        "dataPositiveTestsByAgeChart":
        positive_test_by_age_chart_data,
        "dataPositiveNegativeChart":
        positive_negative_chart_data,
        "dataVaccinatedPeopleChart":
        vaccinated_people_chart_data,
        "dataMunicipalities":
        municipalities_data,
        "hospital":
        hospital,  # TODO: Rename this to make it clearer what data it contains.
        "vaccinationNumberTotal":
        vaccination_number_total,
        "vaccinationNumberLastDay":
        vaccination_number_last_day,
        "completedVaccinationNumberTotal":
        completed_vaccination_number_total,
        "completedVaccinationNumberLastDay":
        completed_vaccination_number_last_day,
        "allVaccinationNumberTotal":
        all_vaccination_number_total,
        "allVaccinationNumberLastDay":
        all_vaccination_number_last_day,
        "allVaccinationFromPopulationPercentage":
        last_day_vaccination_data["PopulationCoverage"],
        "completelyVaccinatedFromTotalVaccinatedPercentage":
        fully_vaccinated_from_total_vaccinated_percentage,
    }

    # Dump JSON output
    log_status("Dumping JSON output")
    save_as_json(OUTPUT_FILE_LOCATION, final_json)

    # Log finish time
    finish = datetime.today().astimezone(estonian_timezone).strftime(
        "%d/%m/%Y, %H:%M")
    log_status("Finished update process at " + finish)
Пример #25
0
def get_positive_tests_by_age_chart_data(json):
    log_status("get_positive_tests_by_age_chart_data()")
    results = [d["ResultValue"] for d in json]
    genders = [d["Gender"] for d in json]
    age_groups = [d["AgeGroup"] for d in json]

    df = pd.DataFrame(
        {"Gender": genders, "AgeGroup": age_groups, "ResultValue": results}
    )

    pos_results = df[df.ResultValue == "P"].groupby(["Gender", "AgeGroup"]).count()
    neg_results = df[df.ResultValue == "N"].groupby(["Gender", "AgeGroup"]).count()

    pos_results.rename(columns={"ResultValue": "Positive"}, inplace=True)

    neg_results.rename(columns={"ResultValue": "Negative"}, inplace=True)

    end_df = pos_results.join(neg_results, how="outer")
    end_df.fillna(0, inplace=True)
    end_df["Positive"] = end_df[["Positive"]].astype("int")

    male_order = [
        ("M", "0-4"),
        ("M", "5-9"),
        ("M", "10-14"),
        ("M", "15-19"),
        ("M", "20-24"),
        ("M", "25-29"),
        ("M", "30-34"),
        ("M", "35-39"),
        ("M", "40-44"),
        ("M", "45-49"),
        ("M", "50-54"),
        ("M", "55-59"),
        ("M", "60-64"),
        ("M", "65-69"),
        ("M", "70-74"),
        ("M", "75-79"),
        ("M", "80-84"),
        ("M", "üle 85"),
    ]

    female_order = [
        ("N", "0-4"),
        ("N", "5-9"),
        ("N", "10-14"),
        ("N", "15-19"),
        ("N", "20-24"),
        ("N", "25-29"),
        ("N", "30-34"),
        ("N", "35-39"),
        ("N", "40-44"),
        ("N", "45-49"),
        ("N", "50-54"),
        ("N", "55-59"),
        ("N", "60-64"),
        ("N", "65-69"),
        ("N", "70-74"),
        ("N", "75-79"),
        ("N", "80-84"),
        ("N", "üle 85"),
    ]

    # Create male positive and negative lists
    malePositive = []
    maleNegative = []
    femalePositive = []
    femaleNegative = []
    for i in range(len(male_order)):
        malePositive.append(end_df.loc[male_order[i], "Positive"])
        maleNegative.append(end_df.loc[male_order[i], "Negative"])
        femalePositive.append(end_df.loc[female_order[i], "Positive"])
        femaleNegative.append(end_df.loc[female_order[i], "Negative"])
    femaleTotal = sum(femalePositive) + sum(femaleNegative)
    maleTotal = sum(malePositive) + sum(maleNegative)
    malePositiveTotal = sum(malePositive)
    femalePositiveTotal = sum(femalePositive)
    maleNegativeTotal = sum(maleNegative)
    femaleNegativeTotal = sum(femaleNegative)

    end_result = {
        "malePositive": malePositive,
        "maleNegative": maleNegative,
        "maleTotal": maleTotal,
        "malePositiveTotal": malePositiveTotal,
        "maleNegativeTotal": maleNegativeTotal,
        "femalePositive": femalePositive,
        "femaleNegative": femaleNegative,
        "femaleTotal": femaleTotal,
        "femalePositiveTotal": femalePositiveTotal,
        "femaleNegativeTotal": femaleNegativeTotal,
    }

    return end_result
Пример #26
0
def main():
    deaths = False
    try:
        scrape_deaths()
        deaths = True
    except:
        log_status("Failed to scrape deaths")
        pass

    # Download data from external services
    log_status("Downloading data from TEHIK: Test results")
    download_data(TESTING_ENDPOINT, TEST_RESULTS_PATH + ".tmp")

    log_status("Downloading data from TEHIK: Location data")
    download_data(TEST_LOCATION_ENDPOINT, TEST_LOCATIONS_PATH + ".tmp")

    log_status("Downloading data from TEHIK: Hospitalization data")
    download_data(HOSPITALIZATION_ENDPOINT, HOSPITALIZATION_PATH + ".tmp")

    log_status("Downloading data from TEHIK: Vaccination data")
    download_data(VACCINATION_ENDPOINT, VACCINATIONS_PATH + ".tmp")

    # Validate data from remote endpoints
    #
    # TODO: Add checks that the testing and vaccination data are up to date. We will need to adopt
    #       a different approach than for the test location and hospitalisation data due to the fact
    #       that the data structure of the JSON is different. Checking the "Last-Modified" header of the
    #       response may be the way to go and would handle the possibility that there are no tests or
    #       vaccinations on a particular day.

    hospitalization = read_json_from_file(HOSPITALIZATION_PATH + ".tmp")
    if not is_up_to_date(hospitalization, "LastLoadStatisticsDate"):
        raise Exception("Hospitalization data is not up-to-date")

    log_status("All OK, replacing old files with downloaded files")
    if deaths:
        move(DEATHS_PATH + ".tmp", DEATHS_PATH)
    move(TEST_RESULTS_PATH + ".tmp", TEST_RESULTS_PATH)
    move(TEST_LOCATIONS_PATH + ".tmp", TEST_LOCATIONS_PATH)
    move(HOSPITALIZATION_PATH + ".tmp", HOSPITALIZATION_PATH)
    move(VACCINATIONS_PATH + ".tmp", VACCINATIONS_PATH)
Пример #27
0
def get_infections_data_by_count_10000(infections_by_county, county_sizes):
    log_status("get_infections_data_by_count_10000()")
    return [
        [county, round(value / county_sizes[county] * 10000, 2), county]
        for county, value, county in infections_by_county
    ]
Пример #28
0
def get_county_daily_active(json, dates, county_mapping, county_sizes):
    log_status("get_county_daily_active()")
    chart_counties = [
        "Harjumaa",
        "Hiiumaa",
        "Ida-Virumaa",
        "Jõgevamaa",
        "Järvamaa",
        "Läänemaa",
        "Lääne-Virumaa",
        "Põlvamaa",
        "Pärnumaa",
        "Raplamaa",
        "Saaremaa",
        "Tartumaa",
        "Valgamaa",
        "Viljandimaa",
        "Võrumaa",
    ]

    county_date_counts = defaultdict(int)

    for res in json:
        if res["ResultValue"] == "P":
            date = res["StatisticsDate"]
            county = county_mapping[res["County"]]
            if county in chart_counties:
                county_date_counts[(county, date)] += 1

    county_by_day = {}
    active_map_100k_playback = []

    for county in chart_counties:
        per_day_county = []
        active_per_day_county_100k = []
        for date in dates:
            val = county_date_counts[(county, str(date.date()))]
            per_day_county.append(val)
            active_per_day_county_100k.append((val / county_sizes[county] * 100000))

        # Calculate cumulative
        county_by_day[county] = list(
            map(int, pd.Series(per_day_county).rolling(14, min_periods=0).sum())
        )
        active_map_100k_playback.append(
            {
                "MNIMI": county,
                "sequence": list(
                    round(
                        pd.Series(active_per_day_county_100k)
                        .rolling(14, min_periods=0)
                        .sum(),
                        1,
                    )
                ),
                "drilldown": county,
            }
        )
        active_list = {
            "countyByDayActive": county_by_day,
            "activeMap100kPlayback": active_map_100k_playback,
        }

    return active_list