def load_adverse_event_rate(builder: 'Builder'): location = builder.configuration.input_data.location draw = builder.configuration.input_data.input_draw_number seed = get_hash( f'ldlc_adverse_event_rate_location_{location}_draw_{draw}') return sample_truncnorm_distribution(seed, AdverseEffects.rate_mean, AdverseEffects.rate_sd)
def get_iron_hemoglobin_effect(draw: int): """Return normal distribution of hemoglobin shifts resulting from iron fortification""" seed = get_hash(project_globals.IRON_RANDOM_SEEDS.IF_HEMO_EFFECT.format(draw=draw)) np.random.seed(seed) q_975_stdnorm = scipy.stats.norm().ppf(0.975) std = (params.HEMOGLOBIN_SHIFT_Q_975 - params.HEMOGLOBIN_SHIFT_MEAN) / q_975_stdnorm return scipy.stats.norm(params.HEMOGLOBIN_SHIFT_MEAN, std).rvs()
def sample_ldlc_reduction(location: str, draw: int, treatment: str) -> float: treatment_key = 'statin' if 'statin' in treatment else treatment seed = get_hash( f'{treatment_key}_ldlc_reduction_draw_{draw}_location_{location}') data = pd.read_csv(paths.LDLC_REDUCTION).set_index('treatment') params = data.loc[treatment, :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def sample_probability_target_given_rx(location: str, draw: int) -> float: location = sanitize_location(location) seed = get_hash( f'target_given_rx_probability_draw_{draw}_location_{location}') data = pd.read_csv(paths.PROB_TARGET_GIVEN_RX).set_index(LOCATION_COLUMN) params = data.loc[location, :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def sample_raw_rx_change(location: str, draw: int, rx_change: str) -> float: location = sanitize_location(location) """Raw result: needs to be adjusted""" seed = get_hash(f'{rx_change}_probability_draw_{draw}_location_{location}') data = pd.read_csv(paths.PROB_ADDING_DRUGS).set_index('probability_type') params = data.loc[rx_change, :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def iron_content_ratio(draw: str, location: str) -> float: """ Used from both the coverage and maternal fortification effect """ seed = get_hash(project_globals.IRON_RANDOM_SEEDS.IF_AMOUNT.format(draw=draw, location=location)) np.random.seed(seed) iron_lower, iron_upper = params.IRON_VALUES_PER_LOCATION[location] if iron_lower == iron_upper: return iron_upper else: return scipy.stats.uniform(iron_lower, iron_upper).rvs()
def sample_probability_testing_ldl_c(location: str, draw: int) -> float: location = sanitize_location(location) seed = get_hash( f'testing_ldl_c_probability_draw_{draw}_location_{location}') data = pd.read_csv( paths.PROB_TESTING_LDL_C_PATH).set_index(LOCATION_COLUMN) params = data.loc[location, :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def sample_iron_fortification_coverage(location: str, draw: int, coverage_time: str) -> float: seed = get_hash( f'iron_fortification_coverage_draw_{draw}_location_{location}') return sum([ coverage_params['weight'] * sample_beta_distribution(seed, coverage_params[coverage_time]) for coverage_params in IRON_FORTIFICATION_COVERAGE[location] ])
def sample_vitamin_a_coverage(location: str, draw: int, coverage_time: str) -> float: seed = get_hash( f'vitamin_a_fortification_coverage_draw_{draw}_location_{location}') return sum([ coverage_params['weight'] * sample_beta_distribution(seed, coverage_params[coverage_time]) for coverage_params in VITAMIN_A_COVERAGE[location] ])
def sample_folic_acid_coverage(location: str, draw: int, coverage_time: str) -> float: seed = get_hash( f'folic_acid_fortification_coverage_draw_{draw}_location_{location}') return sum([ coverage_params['weight'] * sample_beta_distribution(seed, coverage_params[coverage_time]) for coverage_params in FOLIC_ACID_COVERAGE[location] ])
def sample_adherence(location: str, draw: int, multi_pill: bool, previous_cve: bool) -> float: location = sanitize_location(location) seed = get_hash(f'adherence_probability_draw_{draw}_location_{location}') data = pd.read_csv(paths.ADHERENCE_PARAMETERS).set_index( [LOCATION_COLUMN, 'multi_pill', 'previous_cve']) params = data.loc[(location, int(multi_pill), int(previous_cve)), :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def sample_raw_drug_prescription(location: str, draw: int, drug: str) -> float: location = sanitize_location(location) """Raw result: needs to be adjusted""" seed = get_hash( f'{drug}_prescription_probability_draw_{draw}_location_{location}') data = pd.read_csv(paths.CURRENT_RX_DATA_PATH).set_index( [LOCATION_COLUMN, 'current_prescription']) params = data.loc[(location, drug.replace('_', ' ')), :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def sample_therapy_type(location: str, draw: int, therapy_type: str) -> float: location = sanitize_location(location) therapy_type = therapy_type.upper( ) if therapy_type is FDC else therapy_type seed = get_hash( f'{therapy_type}_probability_draw_{draw}_location_{location}') data = pd.read_csv(paths.PROB_THERAPY_TYPE).set_index( [LOCATION_COLUMN, 'therapy_type']) params = data.loc[(location, therapy_type), :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def make_hazard_ratios(draw: int, pfs: dict, os: dict): index_cols = [ models.MULTIPLE_MYELOMA_MODEL_NAME, 'multiple_myeloma_treatment', 'retreated' ] pfs_hazard_ratio = pd.DataFrame(columns=index_cols + ['hazard_ratio']).set_index(index_cols) os_hazard_ratio = pd.DataFrame(columns=index_cols + ['hazard_ratio']).set_index(index_cols) pfs_hazard_ratio.loc[(models.SUSCEPTIBLE_STATE_NAME, models.TREATMENTS.not_treated, False)] = 1.0 os_hazard_ratio.loc[(models.SUSCEPTIBLE_STATE_NAME, models.TREATMENTS.not_treated, False)] = 1.0 for key in pfs: random_seed = '_'.join([str(k) for k in key] + [str(draw)]) rs = np.random.RandomState(get_hash(random_seed)) survival_percentile = rs.random() pfs_hazard_ratio.loc[key] = LogNormalHazardRate( *pfs[key]).get_random_variable(survival_percentile) os_hazard_ratio.loc[key] = LogNormalHazardRate( *os[key]).get_random_variable(survival_percentile) for key in set(os).difference(pfs): random_seed = '_'.join([str(k) for k in key] + [str(draw)]) rs = np.random.RandomState(get_hash(random_seed)) survival_percentile = rs.random() os_hazard_ratio.loc[key] = LogNormalHazardRate( *os[key]).get_random_variable(survival_percentile) pfs_hazard_ratio = pfs_hazard_ratio.reset_index() os_hazard_ratio = os_hazard_ratio.reset_index() # FIXME: Super-duper hack to make lookup table work. Need at least one continuous parameter. pfs_hazard_ratio['year_start'] = 1990 pfs_hazard_ratio['year_end'] = 2100 os_hazard_ratio['year_start'] = 1990 os_hazard_ratio['year_end'] = 2100 return pfs_hazard_ratio, os_hazard_ratio
def sleep_test(job_parameters: JobParameters) -> pd.DataFrame: min_sleep_time = 5 max_sleep_time = 60 rs = np.random.RandomState(seed=get_hash(f"sleep_test_{job_parameters.random_seed}")) sleep_time = rs.randint(min_sleep_time, max_sleep_time) logger.info(f"Sleeping for {sleep_time}s.") time.sleep(sleep_time) logger.info(f"Sleep test successful.") return pd.DataFrame( {"sleep_time": sleep_time}, index=pd.Index([job_parameters.random_seed], name="seed") )
def sample_probability_increasing_dose(scenario: str, location: str, draw: int) -> float: location = sanitize_location(location) scenario = scenario if scenario == 'baseline' else 'intervention' seed = get_hash( f'target_given_rx_probability_scenario_{scenario}_draw_{draw}_location_{location}' ) data = pd.read_csv(paths.PROB_ADDING_DRUGS).set_index( [LOCATION_COLUMN, 'scenario']) params = data.loc[(location, scenario), :] return sample_truncnorm_distribution(seed, params[MEAN_COLUMN], params[SD_COLUMN])
def sample_pfs_and_os(risk_level: str, draw: int, pfs: Dict, os: Dict): random_seed = f'{risk_level}_{draw}' rs = np.random.RandomState(get_hash(random_seed)) survival_percentile = rs.random() pfs_hr = pfs[risk_level] if isinstance(pfs_hr, tuple): pfs_hr = LogNormalHazardRate( *pfs_hr).get_random_variable(survival_percentile) os_hr = os[risk_level] if isinstance(os_hr, tuple): os_hr = LogNormalHazardRate( *os_hr).get_random_variable(survival_percentile) return pfs_hr, os_hr
def sample_vitamin_a_time_to_effect(location: str, draw: int) -> float: seed = get_hash( f'vitamin_a_fortification_time_to_effect_draw_{draw}_location_{location}' ) return sample_lognormal_distribution( seed, VITAMIN_A_FORTIFICATION_TIME_TO_EFFECT)
def sample_vitamin_a_relative_risk(location: str, draw: int) -> float: seed = get_hash( f'vitamin_a_fortification_relative_risk_draw_{draw}_location_{location}' ) return sample_lognormal_distribution( seed, VITAMIN_A_FORTIFICATION_RELATIVE_RISK)
def sample_folic_acid_relative_risk(location: str, draw: int) -> float: seed = get_hash( f'folic_acid_fortification_relative_risk_draw_{draw}_location_{location}' ) return sample_lognormal_distribution( seed, FOLIC_ACID_FORTIFICATION_RELATIVE_RISK)
def get_random_variable(draw: int, seed: str, distribution) -> pd.Series: np.random.seed(get_hash(f'{seed}_draw_{draw}')) return distribution.rvs()
def copula_sample(correlation_matrix, samples, randomness_key): seed = get_hash(randomness_key) np.random.seed(seed) dist = scipy.stats.multivariate_normal(cov=correlation_matrix) return scipy.stats.norm().cdf(dist.rvs(samples))
def get_iron_bw_effect(draw, location): seed = get_hash(project_globals.IRON_RANDOM_SEEDS.IF_BW_SHIFT.format(draw=draw, location=location)) np.random.seed(seed) q_975_stdnorm = scipy.stats.norm().ppf(0.975) std = (params.IF_Q975_BW_SHIFT - params.IF_MEAN_BW_SHIFT) / q_975_stdnorm return scipy.stats.norm(params.IF_MEAN_BW_SHIFT, std).rvs() / params.IRON_EFFECT_DENOMINATOR