def main_stats_page(): data_dir = Path.cwd() / 'data' db = CSVDataBase(data_dir) # load data df = db.import_static_data() totals = df.sum() ts = db.get_csse_time_series_deaths() n = 15 st.markdown(f":skull: reported deaths: `{totals['deaths']:,.0f}` ") st.markdown( f":male_zombie: reported recoveries: `{totals['recov']:,.0f}` \n") st.markdown( f":face_with_thermometer: reported cases: `{totals['cases']:,.0f}` ") st.markdown(" \n \n \n \n") # time series deaths plot st.markdown("#### :skull: deaths over time\n") st.markdown(" \n \n \n \n \n") countries = ['United Kingdom', 'Spain', 'US', 'Italy', 'France'] # all_countries = ts['country'].unique().tolist() # st.sidebar.multiselect('select countries', all_countries, default=countries) chart = utilities.line_plot(ts, countries) st.altair_chart(chart) # bar plots chart = utilities.bar_chart(df, 'country', 'deaths', n=n) st.altair_chart(chart) chart = utilities.bar_chart(df, 'country', 'cases', n=n) st.altair_chart(chart) # scatter plot scatter = utilities.scatter_plot(df, 'cases_per_million', 'pop_density', 'country', n) st.altair_chart(scatter) # data table pd.set_option('display.max_colwidth', -1) st.markdown('### data table\n', unsafe_allow_html=False) st.markdown( "*click column headers to sort* :arrow_up_small::arrow_down_small:") formatted_df = df.style.format({ "cases": "{:,.0f}", "deaths": "{:,.0f}", "recov": "{:,.0f}" }) st.write(formatted_df) st.markdown( "sources \n[wikipedia](https://en.wikipedia.org/wiki/Template:2019%E2%80%9320_coronavirus_pandemic_data) \ \n[world bank](http://api.worldbank.org/v2/en/indicator/EN.POP.DNST?downloadformat=csv) \ \n[johns hopkins](https://github.com/CSSEGISandData/COVID-19) \ \n[apple](https://www.apple.com/covid19/mobility) \ ")
st.markdown(f"#### :skull: deaths over time\n") st.markdown(f" \n \n \n \n \n") countries = ['United Kingdom', 'Spain', 'US', 'Italy', 'France'] all_countries = ts['country'].unique().tolist() # st.sidebar.multiselect('select countries', all_countries, default=countries) chart = utilities.line_plot(ts, countries) st.altair_chart(chart) #bar plots chart = utilities.bar_chart(df, 'country', 'deaths', n=n) st.altair_chart(chart) chart = utilities.bar_chart(df, 'country', 'cases', n=n) st.altair_chart(chart) #scatter plot scatter = utilities.scatter_plot(df, 'cases_per_million', 'pop_density', 'country', n) st.altair_chart(scatter) #data table pd.set_option('display.max_colwidth', -1) st.markdown('### data table\n', unsafe_allow_html=False) st.markdown( "*click column headers to sort* :arrow_up_small::arrow_down_small:") formatted_df = df.style.format({ "cases": "{:,.0f}", "deaths": "{:,.0f}", "recov": "{:,.0f}" }) st.write(formatted_df) st.markdown(
def xtb_conformers(cage, cage_name, etemp, output_dir, conformer_dir, opt=False, opt_level=None, solvent=None): if not exists(output_dir): os.mkdir(output_dir) if solvent is None: solvent_str = None solvent_grid = 'normal' else: solvent_str, solvent_grid = solvent print('doing XTB conformer sorting by energy') conformers = glob.glob(f'{conformer_dir}/conf_*.xyz') ids = [] energies = [] min_energy = 10E20 for file in sorted(conformers): id = file.replace('.xyz', '').split('_')[-1] cage.update_from_file(file) if opt: print(f'optimising conformer {id}') xtb_opt = stk.XTB( xtb_path='/home/atarzia/software/xtb-190806/bin/xtb', output_dir=f'opt_{cage_name}_{id}', gfn_version=2, num_cores=6, opt_level=opt_level, charge=4, num_unpaired_electrons=0, max_runs=1, electronic_temperature=etemp, calculate_hessian=False, unlimited_memory=True, solvent=solvent_str, solvent_grid=solvent_grid) xtb_opt.optimize(mol=cage) cage.write(join(f'{output_dir}', f'conf_{id}_opt.xyz')) print(f'calculating energy of {id}') # Extract energy. xtb_energy = stk.XTBEnergy( xtb_path='/home/atarzia/software/xtb-190806/bin/xtb', output_dir=f'ey_{cage_name}_{id}', num_cores=6, charge=4, num_unpaired_electrons=0, electronic_temperature=etemp, unlimited_memory=True, solvent=solvent_str, solvent_grid=solvent_grid) energy = xtb_energy.get_energy(cage) if energy < min_energy: min_energy_conformer = file min_energy = energy ids.append(id) energies.append(energy) print('done', min_energy, min_energy_conformer) cage.update_from_file(min_energy_conformer) cage.write(f'{cage_name}_optc.mol') cage.write(f'{cage_name}_optc.xyz') cage.dump(f'{cage_name}_optc.json') energies = [(i - min(energies)) * 2625.5 for i in energies] fig, ax = scatter_plot(X=ids, Y=energies, xtitle='conformer id', ytitle='rel. energy [kJmol$^{-1}$]', xlim=(0, 201), ylim=(-5, 1000)) fig.tight_layout() fig.savefig(join(output_dir, f'{cage_name}_conf_energies.pdf'), dpi=720, bbox_inches='tight') plt.close()
# **Question**: Play with the parameters (*variance*, *scale* and *period*) and see how they affect the plot. # In[3]: sample_size = 300 variance = 10 # Variance of the Gaussian noise scale = 100 # Range period = 6 # Simulation are based on cosine function (see data_simulation function) x_train, y_train = data_simulation(int(.7 * sample_size), scale, period, variance) x_test, y_test = data_simulation(int(.3 * sample_size), scale, period, variance) plt = scatter_plot( x_train, x_test, y_train, y_test) # The scatter_plot function is in the utilities script # ### 2.1.2 Getting - visual - intuition about models' capacity # # As seen in class (Slide 38 for example), the higher the model capacity, the better it will fit the training data set (caution though, fitting well the training data does not necesarily lead to good generalization). Here, we use [polynomial regression](https://en.wikipedia.org/wiki/Polynomial_regression) to fit the training set (don't worry, the purpose of the tutorial is not to understand polynomial regression). Note however that the greater is the polynomial degree, the higher is the model capacity. # # **Questions**: # 1. Observe how the fitted curve behave with respect to their polynomial degree. # 2. Would you prefer to fit the data points with polynomial regression of degree 25 or 100? # 3. Wich of these curves should have the best generalization error? # In[6]: degree = [ 0, 1, 3, 5, 10, 20, 150
# **Question**: Variez les paramètres (*variance*, *scale* et *period*) et voyez comment ils changent la figure ci-dessous. # In[3]: sample_size = 300 variance = 10 # La variance du bruit Gaussien scale = 100 # L'étendue period = 6 # La simulation est basée sur la fonction cosinus (voir la fonction data_simulation) x_train, y_train = data_simulation_(int(.7 * sample_size), scale, period, variance) x_test, y_test = data_simulation_(int(.3 * sample_size), scale, period, variance) # Cette fonction est dans le fichier utilities.py plt = scatter_plot(x_train, x_test, y_train, y_test) # ### 2.1.2 Obtenir une première intuition visuelle de la capacité du modèle # # Comme vu dans le cours (par exemple diapo 38), plus haute est la capacité du modèle, meilleur le modèle sera sur l'ensemble d'entraînement (attention, encore une fois, ça ne dit rien sur sa capacité à généraliser). Pour l'instant, nous entraînerons un modèle de [régression polynomiale](https://fr.wikipedia.org/wiki/R%C3%A9gression_polynomiale). # L'avantage de ce modèle est que nous pouvons facilement changer sa capacité en augmentant le degré du polynôme $m$: # # $$\hat{y} = \sum_{i=1}^m w_i x^i $$ # # mais ce n'est pas très important de comprendre les détails du modèle. # # **Questions**: # 1. Observez l'effet du degré du polynôme sur sa capacité à prédire les données. # 2. À votre avis, vaut-il mieux utiliser un polynôme de degré 20 ou 50? # 3. Lesquels de ces polynômes devraient avoir la meilleure erreur de généralisation? #