def main(): # Get the name of the config file config_file = leverage_efficiency.base.get_config_filename(sys.argv) # Extract the data from source data folder into common format import extract extract.main(config_file) # Update data with most recent values (optional) #import update # This doesn't connect to the rest of the pipeline yet #update.main(config_file) # Calculate derived quantities like returns for input into calculations import transform transform.main(config_file) # Perform leverage efficiency calculations import analysis analysis.main(config_file) # Create figures import plots plots.main(config_file) # Create exact figures used in the paper import paper_plots paper_plots.main(config_file) # Create figures used in the EE lecture notes import lecture_plots lecture_plots.main(config_file)
def main(f_config, gff_infile, outdir, has_mirna, make_plots): ensure_dir(outdir) cparser = SafeConfigParser() cparser.read(f_config) f_params = cparser.get('promi2', 'params') listoffeatures = cparser.get('promi2', 'features').split(',') labelfile = cparser.get('configs', 'labelfile') if 'corr' in listoffeatures: is_consider_corr = True else: is_consider_corr = False ## Make sure no chrM in infile _verify_infile(gff_infile) ## Extract features gff_allfeatures = extractFeatures_given_gff(f_config, gff_infile, outdir, has_mirna, is_consider_corr) ## Don't consider TSS which does not have a partner miRNA gff_allfeatures = _filter_keepValidPairs(gff_allfeatures) ## Run Promirna fo_predictions = os.path.join( outdir, 'Predictions.%s.txt' % os.path.basename(gff_infile)) promi2.promi2(f_params, listoffeatures, gff_allfeatures, fo_predictions) ## Label predictions fo_labelledpredictions = fo_predictions + '.label' label.main(fo_predictions, labelfile, fo_labelledpredictions) ## Generate plots if make_plots: import plots outdir_plt = os.path.join(outdir, 'plots') plots.main(fo_labelledpredictions, outdir_plt, f_config) return fo_labelledpredictions
def main(f_config, gff_infile, outdir, has_mirna, make_plots): ensure_dir(outdir) cparser = SafeConfigParser() cparser.read(f_config) f_params = cparser.get('promi2', 'params') listoffeatures = cparser.get('promi2', 'features').split(',') labelfile = cparser.get('configs', 'labelfile') if 'corr' in listoffeatures: is_consider_corr = True else: is_consider_corr = False ## Make sure no chrM in infile _verify_infile(gff_infile) ## Extract features gff_allfeatures = extractFeatures_given_gff(f_config, gff_infile, outdir, has_mirna, is_consider_corr) ## Don't consider TSS which does not have a partner miRNA gff_allfeatures = _filter_keepValidPairs(gff_allfeatures) ## Run Promirna fo_predictions = os.path.join(outdir, 'Predictions.%s.txt' % os.path.basename(gff_infile)) promi2.promi2(f_params, listoffeatures, gff_allfeatures, fo_predictions) ## Label predictions fo_labelledpredictions = fo_predictions + '.label' label.main(fo_predictions, labelfile, fo_labelledpredictions) ## Generate plots if make_plots: import plots outdir_plt = os.path.join(outdir, 'plots') plots.main(fo_labelledpredictions, outdir_plt, f_config) return fo_labelledpredictions
def main(): """ Clean temp folder and run emcee sampler. When complete: - Save results in a .csv file - Generate a corner plot - Send SMS alert (optional) """ start = time.time() cleanTempFolder() def emceeSampler(params_list): """" Run emcee sampler and check for convergence every n steps. Parameters ---------- params_list: list, float NOTE: This is a global variable, imported from init_params.py (see imports list, line 63). Returns ---------- None """ def _prepEmcee(params_list): """Iniitalize walkers in a Gaussian ball around initial guess.""" num_params = len(params_list) print("# of parameters emcee is fitting: {}".format(num_params)) print("Initial parameter guesses:{}".format(params_list)) params_list = np.reshape(params_list, (1, num_params)) pos = params_list + 1e-4 * np.random.randn(n_walkers, num_params) nwalkers, ndim = pos.shape return nwalkers, ndim, pos def _createBackendFile(): """Generate a .h5 backend file to save and monitor progress.""" print(os.getcwd()) backend_folder = os.path.join(os.getcwd(), "backend") datestamp = time.strftime("%Y%m%d-%H%M") filename = "backend-file-{}.h5".format(datestamp) backend = emcee.backends.HDFBackend( os.path.join(backend_folder, filename)) return backend def _saveResults(backend, samples): datestamp = time.strftime("%Y%m%d-%H%M") # Save samples in .csv file stamped with # date and time the run was completed results_folder = os.path.join(os.getcwd(), "results") samples_filename = 'samples-{}.csv'.format(datestamp) np.savetxt(os.path.join(results_folder, samples_filename), samples, delimiter=',', fmt='%e') # Update backend file name to match # the date and time of above .csv file backend_folder = os.path.join(os.getcwd(), "backend") filename = "backend-file-{}.h5".format(datestamp) os.rename(backend.filename, os.path.join(backend_folder, filename)) def _runEmcee(backend, nwalkers, ndim, pos): """ Set up a pool process to run emcee in parallel. Run emcee sampler and check for convergence very n steps, where n is user-defined. """ backend.reset(nwalkers, ndim) index = 0 autocorr = np.empty(max_iter) old_tau = np.inf # Set up parallel processing with Pool(processes=n_processes) as pool: sampler = emcee.EnsembleSampler( nwalkers, ndim, logProbability, args=(x, y, yerr), backend=backend, moves=[ (emcee.moves.DEMove()), (emcee.moves.DESnookerMove()), ], pool=pool) # Run emcee for sample in sampler.sample(pos, iterations=max_iter, progress=True): #print("log_prob = {} ".format(sampler.get_log_prob())) #print("tau = {}".format(sampler.get_autocorr_time())) #print("acceptance fraction = {} ".format(sampler.acceptance_fraction)) # Check for convergence very "check_iter" steps if sampler.iteration % check_iter: continue tau = sampler.get_autocorr_time(tol=0) autocorr[index] = np.mean(tau) index += 1 converged = np.all(tau * 100 < sampler.iteration) converged &= np.all(np.abs(old_tau - tau) / tau < 0.01) if converged: break old_tau = tau # Get samples samples = sampler.chain[:, :, :].reshape((-1, ndim)) print(samples.shape, samples) return samples backend = _createBackendFile() nwalkers, ndim, pos = _prepEmcee(params_list) samples = _runEmcee(backend, nwalkers, ndim, pos) _saveResults(backend, samples) ##return samples # Run emcee sampler code emceeSampler(params_list) # Plot samples, save in /results/plots folder corner_plot.main() # Calculate runtime, send SMS alert (optional) end = time.time()
def mainjj(): plots.main() crawl.main() main()
def main(data_path): """This is basically the full streamlit application code. It is run after a small basic set-up and the successfull user authentication (see below). """ data_loaded = helpers.load_preprocessed_data(data_path) date_list = helpers.get_filter_options_for_due_date(data_loaded, 24) max_date = helpers.return_max_date_string(data_loaded) filter_due_date = st.sidebar.selectbox("Auswahl Stichdatum:", options=date_list) actual_date = helpers.return_actual_date_string(filter_due_date, max_date) data_truncated_head = helpers.truncate_data_to_actual_date( data_loaded, actual_date) n_years = helpers.calculate_max_n_years_available(data_truncated_head) filter_result_dim = st.sidebar.selectbox( "Auswahl Resultatsdimension:", options=helpers.get_filter_options_for_result_dim(n_years), ) data_truncated = helpers.truncate_data_n_years_back( data_truncated_head, actual_date, n_years) data_prepared = helpers.prepare_values_according_to_result_dim( data_truncated, filter_result_dim, actual_date) if filter_result_dim == "Monat": avg_bool = st.sidebar.checkbox("Ø-Werte pro aktive Konten", value=False) else: st.sidebar.text("[Ø-Werte nicht verfügbar]") avg_bool = False data_prepared_value = helpers.replace_monthly_values_with_avg( data_prepared, filter_result_dim, avg_bool) data_with_diff = helpers.calculate_diff_column(data_prepared_value) data_actual = helpers.create_df_with_actual_period_only( data_with_diff, actual_date) mandant_groups = helpers.get_filter_options_for_mandant_groups(data_actual) kpi_groups = helpers.get_filter_options_for_kpi_groups() # SIDEBAR filter_mandant = st.sidebar.selectbox("Auswahl Mandanten-Gruppe:", options=mandant_groups) filter_kpi_groups = st.sidebar.selectbox("Auswahl KPI-Gruppe:", options=kpi_groups) filter_display_mode = st.sidebar.radio( "Auswahl Gruppierung für Anzeige:", options=["nach Entität", "nach KPI"]) # TODO: Filter for Product Dim is temporarily (?) disabled (fixed to "Produkt") filter_product_dim = "Produkt" # filter_product_dim = st.sidebar.radio( # "Auswahl Produktsicht:", options=["Produkt", "Kartenprofil"] # ) st.sidebar.markdown("---") st.sidebar.text("") st.sidebar.text(f"Datenstand:\n {max_date}") # UPPER FILTER OPTIONS MAIN PAGE data = helpers.filter_for_sidebar_selections_mandant( data_actual, filter_mandant) data = helpers.filter_for_sidebar_selections_kpi(data, filter_kpi_groups) # GENERATING OPTION FOR MAIN PAGE FILTERS entity_options = helpers.get_filter_options_for_entities(data) kpi_options = helpers.get_filter_options_for_kpi(data) # MAIN PAGE FILTERS filter_entity = st.multiselect("Select entities:", options=entity_options, default=["[alle]"]) filter_kpi = st.multiselect("Select KPIs:", options=kpi_options, default=["[alle]"]) st.write("") # FILTERING DATA ACCORDING TO CHOICES data = helpers.filter_for_entity_and_kpi( data, filter_entity=filter_entity, filter_kpi=filter_kpi, ) # DISPLAY AND STYLING OF DATAFRAMES data_display = helpers.prepare_for_display(data, filter_display_mode) helpers.display_dataframes(data_display, filter_display_mode, filter_product_dim, filter_mandant, filter_entity, avg_bool) # DISPLAY STANDARD PLOT IF CONDITIONS ARE MET fig, df_plot = plots.main(data, data_truncated) if fig is not None: st.plotly_chart(fig) # EXCEL EXPORT excel = st.button("Download Excel") if excel: if fig is not None: download_data = df_plot else: download_data = downloads.style_for_export_if_no_plot( data, filter_display_mode) download_path = downloads.get_download_path() b64, href = downloads.export_excel(download_data, download_path) st.markdown(href, unsafe_allow_html=True)
def test_plots(tmp_path: str): plots.main(tmp_path)
def main(f_config, gff_cage, is_gff, outdir, make_plots): cparser = SafeConfigParser() cparser.read(f_config) in_bname = os.path.basename(gff_cage) if outdir == None: outdir = 'promi2_outdir_' + in_bname + '_' + random_string(6) ensure_dir(outdir, False) f_param = cparser.get('promi2', 'params') listoffeatures = cparser.get('promi2', 'features') listoffeatures = listoffeatures.split(',') if 'corr' in listoffeatures: is_consider_corr = True corrmethod = cparser.get('correlation', 'corrmethod') else: is_consider_corr = False ## PART1: Feature extraction if not is_gff: ## feature extraction: cpg, cons, tata (features.py) outdir_seqfeatures = os.path.join(outdir, 'seqfeatures') ensure_dir(outdir_seqfeatures, False) gff_1kbfeatures = os.path.join(outdir_seqfeatures, 'features_1kbseq.gff') f_fasta = cparser.get('genome', 'fasta') f_chromsizes = cparser.get('genome', 'chromsizes') d_phastcons = cparser.get('cons', 'phastcons') TRAP = cparser.get('tata', 'trap') f_psemmatrix = cparser.get('tata', 'psem') features.main(gff_cage, outdir_seqfeatures, f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures) ## feature extraction: mirna_proximity (mirna_proximity.py) outdir_mprox = os.path.join(outdir, 'mprox') ensure_dir(outdir_mprox, False) gff_mirnaprox = os.path.join(outdir_mprox, 'features_mirnaprox.gff') gff_mirna = cparser.get('mirbase', 'gff2') mirna_proximity.main(gff_cage, gff_mirna, gff_mirnaprox) ## merge extracted features (gff_unify_features.py) gff_features = os.path.join(outdir, 'Features.1kb.mprox.' + in_bname) gff_unify_features.main(gff_1kbfeatures, gff_mirnaprox, 'mirna_prox', '0', gff_features) if is_consider_corr: ## merge extracted features (gff_unify_features.py) after compute correlation gff_features_corr = os.path.join( outdir, 'Features.1kb.mprox.%s.%s' % (corrmethod, in_bname)) outdir_corr = os.path.join(outdir, 'corr') m_mirna = cparser.get('correlation', 'srnaseqmatrix') m_tss = cparser.get('correlation', 'cageseqmatrix') gff_corr = correlation.main(gff_mirna, m_mirna, m_tss, corrmethod, outdir_corr) gff_unify_features.main(gff_features, gff_corr, 'corr', '0', gff_features_corr) gff_allfeatures = gff_features_corr else: gff_allfeatures = gff_features else: gff_allfeatures = gff_cage with open(gff_allfeatures) as f: l = f.readline().split('\t') if not (':' in l[7]): sys.exit('ERROR: this is not a features.gff formatted file') ## PART2: extract parameters & run promirna f_prediction = os.path.join(outdir, 'Predictions.' + in_bname + '.txt') print 'COMPUTING: "%s"...' % f_prediction promi2(f_param, listoffeatures, gff_allfeatures, f_prediction) ## PART3: plots if make_plots: plotdir = os.path.join(outdir, 'plots') ensure_dir(plotdir, False) plots.main(f_prediction, plotdir, f_config)
# Copyright 2021 Alexander Huntley # This file is part of Plots. # Plots is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # Plots is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with Plots. If not, see <https://www.gnu.org/licenses/>. import plots plots.main()