def _get_thresholds(stratifications: List[Tuple], means: pd.DataFrame, sds: pd.DataFrame, weights_df: pd.DataFrame, draw: int) -> pd.Series: col = f'draw_{draw}' thresholds = pd.Series(0, index=means.index, name=col) ts = time.time() print(f'Start: {ts}') for i, stratification in enumerate(stratifications): mu = means.loc[stratification, col] sigma = sds.loc[stratification, col] threshold = 0 if mu and sigma: weights = weights_df.loc[stratification].reset_index() weights = (weights[weights['parameter'] != 'glnorm']. loc[:, ['parameter', 'value']].set_index( 'parameter').to_dict()['value']) weights = {k: [v] for k, v in weights.items()} ens_dist = EnsembleDistribution(weights=weights, mean=mu, sd=sigma) threshold = minimize(lambda x: (ens_dist.ppf(x) - 7)**2, [0.5], bounds=Bounds(0, 1.0), method='Nelder-Mead').x[0] print(f'mu: {mu}, sigma: {sigma}, threshold: {threshold}') thresholds.loc[stratification] = threshold tf = time.time() print(f'End: {tf}') print(f'Duration: {tf - ts}') return thresholds
def calc_hypertensive(location, draw): art_path = HYPERTENSION_DATA_FOLDER / f'{location}/data.hdf' art = Artifact(str(art_path), filter_terms=[f'draw=={draw}']) # I can drop indices and know that the means/sds/weights will be aligned b/c we sort the data in vivarium_inputs mean = art.load('risk_factor.high_systolic_blood_pressure.exposure') demographic_index = mean.index # but we'll need it later for the proportions mean = mean.reset_index(drop=True) sd = art.load( 'risk_factor.high_systolic_blood_pressure.exposure_standard_deviation' ).reset_index(drop=True) # these will be the same for all draws weights = prep_weights(art) threshold = pd.Series(HYPERTENSION_THRESHOLD, index=mean.index) dist = EnsembleDistribution(weights=weights, mean=mean[f'draw_{draw}'], sd=sd[f'draw_{draw}']) props = (1 - dist.cdf(threshold)).fillna( 0) # we want the proportion above the threshold props.index = demographic_index props.name = f'draw_{draw}' props = props.droplevel('parameter').fillna(0) return props
def ppf(self, q): if not q.empty: q = clip(q) weights = self.weights(q.index) parameters = { name: parameter(q.index) for name, parameter in self.parameters.items() } x = EnsembleDistribution(weights, parameters).ppf(q) x[x.isnull()] = 0 else: x = pd.Series([]) return x
def find_rr(weights, mean, standard_dev, attributable_fraction, sample_size=10000): target = 1 / (1 - attributable_fraction) dist = EnsembleDistribution(weights, mean=mean, sd=standard_dev) q = .98 * np.random.random(sample_size) + 0.01 x_ = dist.ppf(q) def loss(guess): y = np.maximum(x_ - tmrel, 0) / scale mean_rr = 1 / sample_size * np.sum(guess**y) return (mean_rr - target)**2 return optimize.minimize(loss, 2)
def _get_parameters(self, weights, mean, sd): index_cols = ['sex', 'age_start', 'age_end', 'year_start', 'year_end'] weights = weights.set_index(index_cols) mean = mean.set_index(index_cols)['value'] sd = sd.set_index(index_cols)['value'] weights, parameters = EnsembleDistribution.get_parameters(weights, mean=mean, sd=sd) return weights.reset_index(), { name: p.reset_index() for name, p in parameters.items() }
def get_dist(dist_params, stratification, draw): if isinstance(dist_params, pd.DataFrame): return IKFDist(dist_params.loc[stratification, draw]) mu = dist_params.mean.loc[stratification, draw] sigma = dist_params.sd.loc[stratification, draw] if mu and sigma: weights = dist_params.weights.loc[stratification].reset_index() weights = (weights[weights['parameter'] != 'glnorm']. loc[:, ['parameter', 'value']].set_index( 'parameter').to_dict()['value']) weights = {k: [v] for k, v in weights.items()} return EnsembleDistribution(weights=weights, mean=mu, sd=sigma) else: return None