def create_submission(model, datasets, raw_datasets):
    submission_parts = []
    for ds, raw_ds in zip(datasets, raw_datasets):
        ds_predictions = get_predictions(ds, model)
        submission_part = predictions_to_submission(ds_predictions, raw_ds)
        submission_parts.append(submission_part)
    submission = pd.concat(submission_parts, ignore_index=True)
    return submission
Пример #2
0
def test(mhc, data, model, model_path):
    '''
    Training protocol
    '''

    # print out options
    print('Testing\nMHC: %s\nData: %s\nModel: %s\nSave path: %s' %
          (mhc, data, model, model_path))

    # load training
    test_data = Dataset.from_csv(filename=data,
                                 sep=',',
                                 allele_column_name='mhc',
                                 peptide_column_name='peptide',
                                 affinity_column_name='IC50(nM)')

    # apply cut/pad or mask to same length
    if 'lstm' in model or 'gru' in model:
        test_data.mask_peptides()
    else:
        test_data.cut_pad_peptides()

    # get the allele specific data
    mhc_test = test_data.get_allele(mhc)

    # define model
    if model == 'fc':
        model = models.mhcnuggets_fc()
    elif model == 'gru':
        model = models.mhcnuggets_gru()
    elif model == 'lstm':
        model = models.mhcnuggets_lstm()
    elif model == 'chunky_cnn':
        model = models.mhcnuggets_chunky_cnn()
    elif model == 'spanny_cnn':
        model = models.mhcnuggets_spanny_cnn()

    # compile model
    model.load_weights(model_path)
    model.compile(loss='mse', optimizer=Adam(lr=0.001))

    # get tensorized values for training
    test_peptides, test_continuous, test_binary = mhc_test.tensorize_keras(
        embed_type='softhot')

    # test
    preds_continuous, preds_binary = get_predictions(test_peptides, model)
    test_auc = roc_auc_score(test_binary, preds_continuous)
    test_f1 = f1_score(test_binary, preds_binary)
    test_ktau = kendalltau(test_continuous, preds_continuous)[0]
    print('Test AUC: %.4f, F1: %.4f, KTAU: %.4f' %
          (test_auc, test_f1, test_ktau))
Пример #3
0
def main():

    if len(sys.argv) <= 2:
        print("Please provide the inputfile(realtimedata)", len(sys.argv))
        exit(-1)

    window = 20
    window2 = 10
    user_rating = 5
    chances = 0.2
    inputfile = sys.argv[1]
    out_dir = sys.argv[1].split('.')[0] + "_filtered"

    if len(sys.argv) == 3:
        print("User past rating out of 10", int(sys.argv[2]))
        user_rating = int(sys.argv[2])

    baseline_dataset = load_my_data("basedata/main.csv", ",")
    baseline_dataset = preprocess_data(baseline_dataset, window, window2)
    #print(baseline_dataset)
    m_mdls = mdl.train_model(baseline_dataset)
    print("DONE TRAINING MODELS WITH BASELINE DATASET")
    #os.mkdir(out_dir)

    filename = inputfile
    dataset1 = load_my_data(inputfile, ',')
    sliding_samples = 120
    rows = dataset1.shape[0]
    for timestep in range(int(rows / sliding_samples) * sliding_samples):
        start = timestep
        end = timestep + sliding_samples
        print("PROCESSING WINDOW FROM ", start, " UNTIL ", end)
        if end > rows:
            end = rows - 1
        if end - start + 1 < 12:
            timestep += sliding_samples
            continue
        dset1 = dataset1[start:end]
        dset1 = preprocess_data(dset1, window, window2)
        #print(dset1)
        out_lbls, prob = mdl.get_predictions(dset1, m_mdls)
        chances, user_rating = get_accident_probability(
            user_rating, out_lbls, prob, chances)
        #write_to_file(out_dir, inputfile.split('.')[0], dataset1)
        timestep += sliding_samples
    print("CHANCES OF ACCIDENT:", chances, " NEW USER RATING:",
          round(user_rating))
Пример #4
0
def calculate_relation(mhc, data, model, weights_dir):
    '''
    Training protocol
    '''

    print('Calculating tuning MHC for %s' % mhc)

    relations_dict = {}

    # get the allele specific data
    mhc_data = data.get_allele(mhc)
    train_peptides, train_continuous, train_binary = mhc_data.tensorize_keras(embed_type='softhot')
    best_mhc = ''
    best_auc = 0
    num_mhc = len(mhc_data.peptides)

    for tuning_mhc in sorted(set(data.alleles)):

        # don't want to tune with ourselves
        if mhc == tuning_mhc:
            continue

        # define the path to save weights
        try:
            model_path = os.path.join(weights_dir, tuning_mhc + '.h5')
            model.load_weights(model_path)
        except IOError:
            continue
        preds_continuous, preds_binary = get_predictions(train_peptides, model)

        try:
            auc = roc_auc_score(train_binary, preds_continuous)
            #print ('MHC: %s, AUC: %.4f, F1: %.4f, KTAU: %.4f' % (tuning_mhc,
            #                                                     auc,
            #                                                     f1,
            #                                                     ktau))
            if auc > best_auc:
                best_mhc = tuning_mhc
                best_auc = auc
                num_tuning_mhc = len(data.get_allele(tuning_mhc).peptides)
        except ValueError:
            continue

    return best_mhc, best_auc, num_mhc, num_tuning_mhc
Пример #5
0
def predict(model, weights_path, peptides_path):
    '''
    Training protocol
    '''

    # read peptides
    peptides = [p.strip() for p in open(peptides_path)]

    print('Predicting for %d peptides' % (len(peptides)))
    # apply cut/pad or mask to same length
    if 'lstm' in model or 'gru' in model:
        normed_peptides = dataset.mask_peptides(peptides)
    else:
        normed_peptides = dataset.cut_pad_peptides(peptides)

    # get tensorized values for prediction
    peptides_tensor = dataset.tensorize_keras(normed_peptides,
                                              embed_type='softhot')

    # make model
    print('Building model')
    # define model
    if model == 'fc':
        model = models.mhcnuggets_fc()
    elif model == 'gru':
        model = models.mhcnuggets_gru()
    elif model == 'lstm':
        model = models.mhcnuggets_lstm()
    elif model == 'chunky_cnn':
        model = models.mhcnuggets_chunky_cnn()
    elif model == 'spanny_cnn':
        model = models.mhcnuggets_spanny_cnn()

    model.load_weights(weights_path)
    model.compile(loss='mse', optimizer=Adam(lr=0.001))

    # test model
    preds_continuous, preds_binary = get_predictions(peptides_tensor, model)
    ic50s = [dataset.map_proba_to_ic50(p[0]) for p in preds_continuous]
    for i, peptide in enumerate(peptides):
        print(peptide, ic50s[i])
def run_app():
    css.hide_menu()
    css.limit_plot_size()

    # Get cached country data
    countries = fetch_country_data()

    if countries.stale:
        st.caching.clear_cache()
        countries = fetch_country_data()

    st.markdown(
        body=generate_html(text=f"Corona Calculator", bold=True, tag="h1"),
        unsafe_allow_html=True,
    )
    st.markdown(
        body=generate_html(
            tag="h2",
            text="A tool to help you visualize the impact of social distancing <br>",
        ),
        unsafe_allow_html=True,
    )

    st.markdown(
        body=generate_html(
            text="<strong>Disclaimer:</strong> <em>The creators of this application are not healthcare professionals. "
            "The illustrations provided were estimated using best available data but might not accurately reflect reality.</em>",
            color="gray",
            font_size="12px",
        ),
        unsafe_allow_html=True,
    )
    st.markdown(
        body=generate_html(
            tag="h4",
            text=f"<u><a href=\"{NOTION_MODELLING_DOC}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>"
            "Methodology</a></u> <span> &nbsp;&nbsp;&nbsp;&nbsp</span>"
            f"<u><a href=\"{MEDIUM_BLOGPOST}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>"
            "Blogpost</a> </u>"
            "<hr>",
        ),
        unsafe_allow_html=True,
    )

    sidebar = Sidebar(countries)
    country = sidebar.country
    country_data = countries.country_data[country]
    _historical_df = countries.historical_country_data
    historical_data = _historical_df.loc[_historical_df.index == country]
    number_cases_confirmed = country_data["Confirmed"]
    population = country_data["Population"]
    num_hospital_beds = country_data["Num Hospital Beds"]

    st.subheader(f"How has the disease spread in {country}?")
    st.write(
        "The number of reported cases radically underestimates the true cases, because people do not show symptoms for "
        "several days, not everybody gets tested, and the tests take a few days to return results. "
        "The extent depends upon your country's testing strategy."
        " This estimate (14% reporting) is from China ([source](https://science.sciencemag.org/content/early/2020/03/13/science.abb3221))."
    )
    # Estimate true cases
    true_cases_estimator = models.TrueInfectedCasesModel(
        constants.ReportingRate.default
    )
    estimated_true_cases = true_cases_estimator.predict(number_cases_confirmed)

    reported_vs_true_cases(int(number_cases_confirmed), estimated_true_cases)

    st.markdown(
        f"Given the prevalence of the infection in your country, the probability of being infected at this time is "
        f"**{estimated_true_cases / population:.3%}**. Even if you show no symptoms, the probability of being infected is "
        f"**{models.get_probability_of_infection_give_asymptomatic(population, estimated_true_cases, constants.AsymptomaticRate.default):.3%}**. "
        f"Note that these probabilities are on a country-wide basis and so may not apply to your situation."
    )

    # Plot historical data
    fig = graphing.plot_historical_data(historical_data)
    st.write(fig)

    asymptomatic_cases_estimator = models.AsymptomaticCasesModel(
        constants.AsymptomaticRate.default
    )

    contact_rate = sidebar.contact_rate

    asymptomatic_sir_model = models.AsymptomaticSIRModel(
        transmission_rate_per_contact=constants.TransmissionRatePerContact.default_per_symptom_state,
        contact_rate=contact_rate,
        asymptomatic_cases_model=asymptomatic_cases_estimator,
        recovery_rate=constants.RecoveryRate.default,
        normal_death_rate=constants.MortalityRate.default,
        critical_death_rate=constants.CriticalDeathRate.default,
        hospitalization_rate=constants.HospitalizationRate.default,
        hospital_capacity=num_hospital_beds
    )

    df = models.get_predictions(
        cases_estimator=true_cases_estimator,
        sir_model=asymptomatic_sir_model,
        num_diagnosed=number_cases_confirmed,
        num_recovered=country_data["Recovered"],
        num_deaths=country_data["Deaths"],
        area_population=population,
    )

    st.subheader("How will my actions affect the spread?")
    st.write(
        "The critical factor for controlling spread is how many others infected people interact with each day. "
        "This has a dramatic effect upon the dynamics of the disease. "
    )
    st.write(
        "**Play with the slider to the left to see how this changes the dynamics of disease spread**"
    )

    df_base = df[~df.Status.isin(["Need Hospitalization"])]
    base_graph = graphing.infection_graph(df_base, df_base.Forecast.max(), sidebar.contact_rate)
    st.warning(graph_warning)
    st.write(base_graph)

    st.subheader("How will this affect my healthcare system?")
    st.write(
        "The important variable for hospitals is the peak number of people who require hospitalization"
        " and ventilation at any one time."
    )

    # Do some rounding to avoid beds sounding too precise!
    approx_num_beds = round(num_hospital_beds / 100) * 100
    st.write(
        f"Your country has around **{approx_num_beds:,}** beds. Bear in mind that most of these "
        "are probably already in use for people sick for other reasons."
    )
    st.write(
        "It's hard to know exactly how many ventilators are present per country, but there will certainly be a worldwide "
        "shortage. Many countries are scrambling to buy them [(source)](https://www.reuters.com/article/us-health-coronavirus-draegerwerk-ventil/germany-italy-rush-to-buy-life-saving-ventilators-as-manufacturers-warn-of-shortages-idUSKBN210362)."
    )

    peak_occupancy = df.loc[df.Status == "Need Hospitalization"]["Forecast"].max()
    percent_beds_at_peak = min(100 * num_hospital_beds / peak_occupancy, 100)

    num_beds_comparison_chart = graphing.num_beds_occupancy_comparison_chart(
        num_beds_available=approx_num_beds, max_num_beds_needed=peak_occupancy, contact_rate=sidebar.contact_rate
    )

    st.write(num_beds_comparison_chart)

    st.markdown(
        f"At peak, **{int(peak_occupancy):,}** people will need hospital beds. ** {percent_beds_at_peak:.1f}% ** of people "
        f"who need a bed in hospital will have access to one given your country's historical resources. This does "
        f"not take into account any special measures that may have been taken in the last few months."
    )

    st.subheader("How severe will the impact be?")

    num_dead = df[df.Status == "Dead"].Forecast.iloc[-1]
    num_recovered = df[df.Status == "Recovered"].Forecast.iloc[-1]
    st.markdown(
        f"If the average person in your country adopts the selected behavior, we estimate that **{int(num_dead):,}** "
        f"people will die."
    )

    st.markdown(
        f"The graph above below a breakdown of casualties and hospitalizations by age group."
    )

    outcomes_by_age_group = models.get_status_by_age_group(num_dead, num_recovered)
    fig = graphing.age_segregated_mortality(
        outcomes_by_age_group.loc[:, ["Dead", "Need Hospitalization"]], contact_rate=sidebar.contact_rate
    )
    st.write(fig)

    st.write(
        f"Parameters by age group, including demographic distribution, are [worldwide numbers](https://population.un.org/wpp/DataQuery/) "
        f"so they may be slightly different in your country."
    )
    st.write(
        f"We've used mortality rates from this [recent paper from Imperial College](https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf?fbclid=IwAR3TzdPTcLiOZN5r2dMd6_08l8kG0Mmr0mgP3TdzimpqB8H96T47ECBUfTM). "
        f"However, we've adjusted them according to the [maximum mortality rate recorded in Wuhan](https://wwwnc.cdc.gov/eid/article/26/6/20-0233_article)"
        f" when your country's hospitals are overwhelmed: if more people who need them lack hospital beds, more people will die."
    )
    st.write("<hr>", unsafe_allow_html=True)
    st.write(
        "Like this? [Click here to share it on Twitter](https://ctt.ac/u5U39), and "
        "[let us know your feedback via Google Form](https://forms.gle/J6ZFFgh4rVQm4y8G7)"
    )

    utils.insert_github_logo()
Пример #7
0
def run_app():
    ############################### Data Load ##################################

    css.hide_menu()

    css.limit_plot_size()

    # Get cached country data
    countries = _fetch_country_data()

    global_data = _fetch_global_data()

    if countries.stale:
        st.caching.clear_cache()

        countries = _fetch_country_data()

    if global_data.stale:
        st.caching.clear_cache()

        global_data = _fetch_global_data()

    ################## Heading Section ####################################

    utils.img_html(
        alt_text='Fractal',
        href='https://fractal.ai',
        src=
        'https://i2.wp.com/fractal.ai/wp-content/uploads/2018/02/header-black-logo.png?fit=126%2C43&ssl=1',
        attributes=dict(width=125, height=43, target='_blank'))

    st.markdown(
        body=generate_html(text=f"Australia COVID-19 Simulator",
                           bold=True,
                           tag="h1"),
        unsafe_allow_html=True,
    )
    st.markdown(
        body=generate_html(
            tag="h2",
            text=
            "A tool to help you visualize the impact of social distancing <br>",
        ),
        unsafe_allow_html=True,
    )

    st.markdown(
        body=generate_html(
            text=
            "<strong>Disclaimer:</strong> <em>The creators of this application are not healthcare professionals. "
            "The illustrations provided were estimated using best available data but might not accurately reflect reality.</em>",
            color="gray",
            font_size="12px",
        ),
        unsafe_allow_html=True,
    )

    ### Add side bar
    sidebar = Sidebar(countries)

    ###################### historical and forecast chart ##############################

    ### Get selected country
    country = sidebar.country

    country_data = countries.country_data[country]

    _historical_df = countries.historical_country_data

    if country == "Australia":

        historical_data_custom = data_utils.make_historical_data(
            _historical_df)

        try:

            forecasted_data = forecast_utils.get_forecasts(
                country, constants.FORECAST_HORIZON)

            historical_plot_df = data_utils.prep_plotting_data(
                forecasted_data, historical_data_custom)

            # fig = graphing.plot_historical_data(historical_data_plot, con_flag=True)

            fig = graphing.plot_time_series_forecasts(historical_plot_df,
                                                      country_flag=True,
                                                      country_name=country)

        except Exception as exc:

            print("Error", exc)

    else:

        historical_data_custom = _historical_df.loc[_historical_df.index ==
                                                    country]

        try:

            forecasted_data = forecast_utils.get_forecasts(
                country, constants.FORECAST_HORIZON)

            historical_plot_df = data_utils.prep_plotting_data(
                forecasted_data, historical_data_custom)

            # fig = graphing.plot_historical_data(historical_data_plot)

            fig = graphing.plot_time_series_forecasts(historical_plot_df,
                                                      country_flag=False,
                                                      country_name=country)

        except Exception as exc:

            print(exc)

    historical_data = _historical_df.loc[_historical_df.index == country]

    number_cases_confirmed = country_data["Confirmed"]

    population = country_data["Population"]

    num_hospital_beds = country_data["Num Hospital Beds"]

    age_data = constants.AGE_DATA.loc[constants.AGE_DATA["State"] ==
                                      country, :]

    st.subheader(
        f"How is the disease likely to spread in {country} in the next week?")

    # Estimate true cases
    true_cases_estimator = models.TrueInfectedCasesModel(
        constants.ReportingRate.default)
    # estimated_true_cases = true_cases_estimator.predict(number_cases_confirmed)

    try:

        week1_est = historical_plot_df.tail(1)

        reported_vs_true_cases(
            int(number_cases_confirmed), week1_est["confirmed"].tolist()[0],
            graphing.abbreviate(week1_est["lower_bound"].tolist()[0],
                                round_factor=0),
            graphing.abbreviate(week1_est["upper_bound"].tolist()[0],
                                round_factor=0))

        # Plot historical data
        st.write(fig)

        print("Historical Data with Forecasts plotted")

    except Exception as exc:

        print(exc)

        st.markdown(
            f"Something went wrong :( Forecasts unavailable. Contact admin")

    ###################### SIR Model and Simulator ##############################

    # Predict infection spread
    sir_model = models.SIRModel(
        transmission_rate_per_contact=constants.TransmissionRatePerContact.
        default,
        contact_rate=sidebar.contact_rate,
        recovery_rate=constants.RecoveryRate.default,
        normal_death_rate=constants.MortalityRate.default,
        critical_death_rate=constants.CriticalDeathRate.default,
        hospitalization_rate=constants.HospitalizationRate.default,
        hospital_capacity=num_hospital_beds,
    )

    df = models.get_predictions(cases_estimator=true_cases_estimator,
                                sir_model=sir_model,
                                num_diagnosed=number_cases_confirmed,
                                num_recovered=country_data["Recovered"],
                                num_deaths=country_data["Deaths"],
                                area_population=population,
                                max_days=sidebar.num_days_for_prediction)

    st.subheader("How will my actions affect the spread?")

    st.write(
        "**Use the slider in the sidebar to see how this changes the dynamics of disease spread**"
    )

    df_base = df[~df.Status.
                 isin(["Need Hospitalization", "Recovered", "Susceptible"])]

    base_graph = graphing.infection_graph(df_base, df_base.Forecast.max(),
                                          population * 0.5, population * 0.75)
    # st.warning(graph_warning)
    st.write(base_graph)

    print("Infections Graph Plotted")

    ###################### Effect on hospitals ##############################

    st.subheader("How will this affect my healthcare system?")

    # Do some rounding to avoid beds sounding too precise!
    approx_num_beds = round(num_hospital_beds / 100) * 100

    st.write(
        f"{country} has around **{approx_num_beds:,}** beds. Bear in mind that most of these "
        "are probably already in use for people sick for other reasons.")

    peak_occupancy = df.loc[df.Status ==
                            "Need Hospitalization"]["Forecast"].max()

    percent_beds_at_peak = min(100 * num_hospital_beds / peak_occupancy, 100)

    num_beds_comparison_chart = graphing.num_beds_occupancy_comparison_chart(
        num_beds_available=approx_num_beds, max_num_beds_needed=peak_occupancy)

    st.write(num_beds_comparison_chart)

    st.markdown(
        f"At peak, **{int(peak_occupancy):,}** people will need hospital beds. At least ** {100 - percent_beds_at_peak:.1f}% ** of "
        f" people who need a bed in hospital might not have access given historical resources of {country}."
    )

    ###################### Death Charts ##############################

    st.subheader("How severe will the impact be?")

    num_dead = df[df.Status == "Dead"].Forecast.iloc[-1]

    num_recovered = df[df.Status == "Recovered"].Forecast.iloc[-1]

    glob_hist = global_data.historical_country_data

    uk_data = glob_hist.loc[(glob_hist.index == "UK") | \
                            (glob_hist.index == "United Kingdom"), :].copy()

    uk_death_mirror = get_uk_death_mirror(uk_data, country_data["Deaths"])

    death_plot = graphing.plot_death_timeseries(df[df.Status == "Dead"],
                                                uk_death_mirror,
                                                country_name=country)

    st.markdown(
        f"If the average person in {country} adopts the selected behavior, we estimate that **{int(num_dead):,}** "
        f"people will die.")

    st.markdown(f"This graph illustrates predicted deaths.")

    outcomes_by_age_group = models.get_status_by_age_group(
        num_dead, num_recovered, age_data)

    fig = graphing.age_segregated_mortality(
        outcomes_by_age_group.loc[:, ["Dead"]])

    st.write(death_plot)

    print("Deaths Graph Plotted")

    st.markdown("-------")
    st.markdown(body=generate_html("We'd love to hear from you!",
                                   tag='h2',
                                   color='#0090c4'),
                unsafe_allow_html=True)
    user_input = st.text_input("Enter your email to contact us")
    utils.contact_us(user_input)

    ###################### Credits ##############################

    st.subheader("References and Credits:")

    st.markdown(
        body=generate_html(
            tag="h4",
            text=
            f"<u><a href=\"{NOTION_MODELLING_DOC}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>"
            "Methodology</a></u> <span> &nbsp;&nbsp;&nbsp;&nbsp</span>"
            "<hr>",
        ),
        unsafe_allow_html=True,
    )

    st.markdown(
        body=generate_html(
            tag="h4",
            text=
            f"<u><a href=\"https://github.com/CSSEGISandData/COVID-19\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>"
            "2019 Novel Coronavirus COVID-19 (2019-nCoV) Data Repository by Johns Hopkins CSSE</a></u> <span> &nbsp;&nbsp;&nbsp;&nbsp</span>"
            "<hr>",
        ),
        unsafe_allow_html=True,
    )

    print("complete....")
def run_app():

    css.hide_menu()
    css.limit_plot_size()

    # Get cached country data
    countries = _fetch_country_data()

    if countries.stale:
        st.caching.clear_cache()
        countries = _fetch_country_data()

    st.markdown(
        body=generate_html(text=f"Corona Calculator", bold=True, tag="h1"),
        unsafe_allow_html=True,
    )
    st.markdown(
        body=generate_html(
            tag="h2",
            text=
            "A tool to help you visualize the impact of social distancing <br>",
        ),
        unsafe_allow_html=True,
    )

    st.markdown(
        body=generate_html(
            text=
            "<strong>Disclaimer:</strong> <em>The creators of this application are not healthcare professionals. "
            "The illustrations provided were estimated using best available data but might not accurately reflect reality.</em>",
            color="gray",
            font_size="12px",
        ),
        unsafe_allow_html=True,
    )
    st.markdown(
        body=generate_html(
            tag="h4",
            text=
            f"<u><a href=\"{NOTION_MODELLING_DOC}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>"
            "Methodology</a></u> <span> &nbsp;&nbsp;&nbsp;&nbsp</span>"
            f"<u><a href=\"{MEDIUM_BLOGPOST}\" target=\"_blank\" style=color:{COLOR_MAP['pink']};>"
            "Blogpost</a> </u>"
            "<hr>",
        ),
        unsafe_allow_html=True,
    )

    sidebar = Sidebar(countries)
    country = sidebar.country
    country_data = countries.country_data[country]
    number_cases_confirmed = country_data["Confirmed"]
    population = country_data["Population"]
    num_hospital_beds = country_data["Num Hospital Beds"]

    sir_model = models.SIRModel(
        transmission_rate_per_contact=constants.TransmissionRatePerContact.
        default,
        contact_rate=sidebar.contact_rate,
        recovery_rate=constants.RecoveryRate.default,
        normal_death_rate=constants.MortalityRate.default,
        critical_death_rate=sidebar.severe_mortality_rate,
        hospitalization_rate=constants.HospitalizationRate.default,
        hospital_capacity=num_hospital_beds,
    )
    true_cases_estimator = models.TrueInfectedCasesModel(
        constants.AscertainmentRate.default)

    df = models.get_predictions(
        true_cases_estimator,
        sir_model,
        number_cases_confirmed,
        population,
        sidebar.num_days_for_prediction,
    )

    reported_vs_true_cases(
        number_cases_confirmed,
        true_cases_estimator.predict(number_cases_confirmed))

    st.write(
        "The number of reported cases radically underestimates the true cases, because people do not show symptoms for "
        "several days, not everybody gets tested, and the tests take a few days to  return results. "
        "The extent depends upon your country's testing strategy."
        " We estimated the above using numbers from Japan ([source](https://www.ncbi.nlm.nih.gov/pubmed/32033064))."
    )

    st.subheader("How will the disease spread?")
    st.write(
        "The critical factor for controlling spread is how many others infected people interact with each day. "
        "This has a dramatic effect upon the dynamics of the disease. ")
    st.write(
        "**Play with the slider to the left to see how this changes the dynamics of disease spread**"
    )

    df_base = df[~df.Status.isin(["Need Hospitalization", "Need Ventilation"])]
    base_graph = graphing.infection_graph(df_base, df.Forecast.max())
    st.warning(graph_warning)
    st.write(base_graph)

    hospital_graph = graphing.hospitalization_graph(
        df[df.Status.isin(["Infected", "Need Hospitalization"])],
        num_hospital_beds,
        max(num_hospital_beds, df.Forecast.max()),
    )

    st.write(
        "Note that we use a fixed estimate of the mortality rate here, of 1% [(source)](https://institutefordiseasemodeling.github.io/nCoV-public/analyses/first_adjusted_mortality_estimates_and_risk_assessment/2019-nCoV-preliminary_age_and_time_adjusted_mortality_rates_and_pandemic_risk_assessment.html). "
        "In reality, the mortality rate will be highly dependent upon the load upon the healthcare system and "
        "the availability of treatment. Some estimates ([like this one](https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(20)30195-X/fulltext)) are closer to 6%."
    )

    st.subheader("How will this affect my healthcare system?")
    st.write(
        "The important variable for hospitals is the peak number of people who require hospitalization"
        " and ventilation at any one time.")

    # Do some rounding to avoid beds sounding too precise!
    st.write(
        f"Your country has around **{round(num_hospital_beds / 100) * 100:,}** beds. Bear in mind that most of these "
        "are probably already in use for people sick for other reasons.")
    st.write(
        "It's hard to know how many ventilators are present per country, but there will certainly be a worldwide "
        "shortage. Many countries are scrambling to buy them [(source)](https://www.reuters.com/article/us-health-coronavirus-draegerwerk-ventil/germany-italy-rush-to-buy-life-saving-ventilators-as-manufacturers-warn-of-shortages-idUSKBN210362)."
    )

    st.warning(graph_warning)
    st.write(hospital_graph)
    peak_occupancy = df.loc[df.Status ==
                            "Need Hospitalization"]["Forecast"].max()
    percent_beds_at_peak = min(100 * num_hospital_beds / peak_occupancy, 100)

    st.markdown(
        f"At peak, **{peak_occupancy:,}** people will need hospital beds. ** {percent_beds_at_peak:.1f} % ** of people "
        f"who need a bed in hospital will have access to one given your country's historical resources. This does "
        f"not take into account any special measures that may have been taken in the last few months."
    )
Пример #9
0
def train(mhc, data, model, model_path, lr, n_epoch, transfer_path):
    '''
    Training protocol
    '''

    # print out options
    print(
        'Training\nMHC: %s\nData: %s\nModel: %s\nSave path: %s\nTransfer: %s' %
        (mhc, data, model, model_path, transfer_path))

    # load training
    train_data = Dataset.from_csv(filename=data,
                                  sep=',',
                                  allele_column_name='mhc',
                                  peptide_column_name='peptide',
                                  affinity_column_name='IC50(nM)')

    # apply cut/pad or mask to same length
    if 'lstm' in model or 'gru' in model:
        train_data.mask_peptides()
    else:
        train_data.cut_pad_peptides()

    # get the allele specific data
    mhc_train = train_data.get_allele(mhc)

    # define model
    if model == 'fc':
        model = models.mhcnuggets_fc()
    elif model == 'gru':
        model = models.mhcnuggets_gru()
    elif model == 'lstm':
        model = models.mhcnuggets_lstm()
    elif model == 'chunky_cnn':
        model = models.mhcnuggets_chunky_cnn()
    elif model == 'spanny_cnn':
        model = models.mhcnuggets_spanny_cnn()

    # check if we need to do transfer learning
    if transfer_path:
        model.load_weights(transfer_path)

    # compile model
    model.compile(loss='mse', optimizer=Adam(lr=0.001))

    # get tensorized values for training
    train_peptides, train_continuous, train_binary = mhc_train.tensorize_keras(
        embed_type='softhot')

    # convergence criterion
    highest_f1 = -1

    for epoch in range(n_epoch):

        # train
        model.fit(train_peptides, train_continuous, epochs=1, verbose=0)
        # test model on training data
        train_preds_cont, train_preds_bin = get_predictions(
            train_peptides, model)
        train_auc = roc_auc_score(train_binary, train_preds_cont)
        train_f1 = f1_score(train_binary, train_preds_bin)
        train_ktau = kendalltau(train_continuous, train_preds_cont)[0]
        print('epoch %d / %d' % (epoch, n_epoch))
        print('Train AUC: %.4f, F1: %.4f, KTAU: %.4f' %
              (train_auc, train_f1, train_ktau))

        # convergence
        if train_f1 > highest_f1:

            highest_f1 = train_f1
            best_epoch = epoch
            model.save_weights(model_path)

    print('Done!')
Пример #10
0
# -*- coding: utf-8 -*-
"""
Main file for the second Machine Learning project:
    ### TODO ###

Authors: Kirill IVANOV, Matthias RAMIREZ, Nicolas TALABOT
"""

import argparse

from models import get_pretrained_models, train_and_get_models, get_predictions

# Argument parser
parser = argparse.ArgumentParser()
parser.add_argument(
    "--train_model",
    help="train the whole model, do not reuse the pre-trained one",
    action="store_true")
args = parser.parse_args()

data_dir = 'training/'
test_data_dir = 'test_set_images/'
model1, model2, model3 = train_and_get_models(
    data_dir) if args.train_model else get_pretrained_models()
get_predictions(model1, model2, model3, test_data_dir)