def get_subtargets(subtarget, sim): ''' A small helper function to see if subtargeting is a list of indices to use, or a function that needs to be called. If a function, it must take a single argument, a sim object, and return a list of indices. Also validates the values. Currently designed for use with testing interventions, but could be generalized to other interventions. Args: subtarget (dict): dict with keys 'inds' and 'vals'; see test_num() for examples of a valid subtarget dictionary sim (Sim): the simulation object ''' # Validation if 'inds' not in subtarget: errormsg = f'The subtarget dict must have keys "inds" and "vals", but you supplied {subtarget}' raise ValueError(errormsg) # Handle the two options of type if callable(subtarget['inds']): # A function has been provided subtarget_inds = subtarget['inds']( sim) # Call the function to get the indices else: subtarget_inds = subtarget['inds'] # The indices are supplied directly # Validate the values subtarget_vals = subtarget['vals'] if sc.isiterable(subtarget_vals): if len(subtarget_vals) != len(subtarget_inds): errormsg = f'Length of subtargeting indices ({len(subtarget_inds)}) does not match length of values ({len(subtarget_vals)})' raise ValueError(errormsg) return subtarget_inds, subtarget_vals
def initialize(self, sim): ''' Fix days and store beta ''' if sc.isstring(self.days) or not sc.isiterable(self.days): self.days = sc.promotetolist(self.days) if isinstance(self.days, list): for d, day in enumerate(self.days): self.days[d] = sim.day( day ) # Ensure it's an integer and not a string or something self.days = sc.promotetoarray(self.days) self.changes = sc.promotetoarray(self.changes) if len(self.days) != len(self.changes): errormsg = f'Number of days supplied ({len(self.days)}) does not match number of changes in beta ({len(self.changes)})' raise ValueError(errormsg) self.orig_betas = {} self.layers = sc.promotetolist(self.layers, keepnone=True) for lkey in self.layers: if lkey is None: self.orig_betas['overall'] = sim['beta'] else: self.orig_betas[lkey] = sim['beta_layer'][lkey] self.initialized = True return
def get_quar_inds(quar_policy, sim): ''' Helper function to return the appropriate indices for people in quarantine based on the current quarantine testing "policy". Used by test_num and test_prob. Not for use by the user. If quar_policy is a number or a list of numbers, then it is interpreted as the number of days after the start of quarantine when a test is performed. It can also be a function that returns the list of indices. Args: quar_policy (str, int, list, func): 'start', people entering quarantine; 'end', people leaving; 'both', entering and leaving; 'daily', every day in quarantine sim (Sim): the simulation object ''' t = sim.t if quar_policy is None: quar_test_inds = np.array([]) elif quar_policy == 'start': quar_test_inds = cvu.true(sim.people.date_quarantined==t-1) # Actually do the day after since testing usually happens before contact tracing elif quar_policy == 'end': quar_test_inds = cvu.true(sim.people.date_end_quarantine==t+1) # +1 since they are released on date_end_quarantine, so do the day before elif quar_policy == 'both': quar_test_inds = np.concatenate([cvu.true(sim.people.date_quarantined==t-1), cvu.true(sim.people.date_end_quarantine==t+1)]) elif quar_policy == 'daily': quar_test_inds = cvu.true(sim.people.quarantined) elif sc.isnumber(quar_policy) or (sc.isiterable(quar_policy) and not sc.isstring(quar_policy)): quar_policy = sc.promotetoarray(quar_policy) quar_test_inds = np.unique(np.concatenate([cvu.true(sim.people.date_quarantined==t-1-q) for q in quar_policy])) elif callable(quar_policy): quar_test_inds = quar_policy(sim) else: errormsg = f'Quarantine policy "{quar_policy}" not recognized: must be a string (start, end, both, daily), int, list, array, set, tuple, or function' raise ValueError(errormsg) return quar_test_inds
def split(self, inds=None, chunks=None): ''' Convenience method for splitting one MultiSim into several. You can specify either individual indices of simulations to extract, via inds, or consecutive chunks of indices, via chunks. If this function is called on a merged MultiSim, the chunks can be retrieved automatically and no arguments are necessary. Args: inds (list): a list of lists of indices, with each list turned into a MultiSim chunks (int or list): if an int, split the MultiSim into chunks of that length; if a list return chunks of that many sims Returns: A list of MultiSim objects **Examples**:: m1 = cv.MultiSim(cv.Sim(label='sim1'), initialize=True) m2 = cv.MultiSim(cv.Sim(label='sim2'), initialize=True) m3 = cv.MultiSim.merge(m1, m2) m3.run() m1b, m2b = m3.split() msim = cv.MultiSim(cv.Sim(), n_runs=6) msim.run() m1, m2 = msim.split(inds=[[0,2,4], [1,3,5]]) mlist1 = msim.split(chunks=[2,4]) # Equivalent to inds=[[0,1], [2,3,4,5]] mlist2 = msim.split(chunks=3) # Equivalent to inds=[[0,1,2], [3,4,5]] ''' # Process indices and chunks if inds is None: # Indices not supplied if chunks is None: # Chunks not supplied if hasattr(self, 'chunks'): # Created from a merged MultiSim inds = self.chunks else: # No indices or chunks and not created from a merge errormsg = f'If a MultiSim has not been created via merge(), you must supply either inds or chunks to split it' raise ValueError(errormsg) else: # Chunks supplied, but not inds inds = [] # Initialize sim_inds = np.arange(len(self)) # Indices for the simulations if sc.isiterable(chunks): # e.g. chunks = [2,4] chunk_inds = np.cumsum(chunks)[:-1] inds = np.split(sim_inds, chunk_inds) else: # e.g. chunks = 3 inds = np.split( sim_inds, chunks) # This will fail if the length is wrong # Do the conversion mlist = [] for indlist in inds: sims = sc.dcp([self.sims[i] for i in indlist]) msim = MultiSim(sims=sims) mlist.append(msim) return mlist
def _make_resdict(self, for_json: bool = True) -> dict: ''' Pre-convert the results structure to a friendier output''' resdict = {} if for_json: resdict['timeseries_keys'] = self.reskeys for key, res in self.results.items(): if isinstance(res, Result): res = res.values if for_json or sc.isiterable(res) and len(res) == self.npts: resdict[key] = res return resdict
def process_days(sim, days): ''' Ensure lists of days are in consistent format. Used by change_beta, clip_edges, and some analyzers. If day is 'end' or -1, use the final day of the simulation. ''' if sc.isstring(days) or not sc.isiterable(days): days = sc.promotetolist(days) for d, day in enumerate(days): if day in ['end', -1]: day = sim['end_day'] days[d] = sim.day( day) # Ensure it's an integer and not a string or something days = sc.promotetoarray(days) return days
def __init__(self, pars): super().__init__() subkeys = ['days', 'vals'] for parkey in pars.keys(): for subkey in subkeys: if subkey not in pars[parkey].keys(): errormsg = f'Parameter {parkey} is missing subkey {subkey}' raise KeyError(errormsg) if not sc.isiterable(pars[parkey][subkey]): pars[parkey][subkey] = sc.promotetoarray(pars[parkey][subkey]) len_days = len(pars[parkey]['days']) len_vals = len(pars[parkey]['vals']) if len_days != len_vals: raise ValueError(f'Length of days ({len_days}) does not match length of values ({len_vals}) for parameter {parkey}') self.pars = pars return
def process_days(sim, days, return_dates=False): ''' Ensure lists of days are in consistent format. Used by change_beta, clip_edges, and some analyzers. If day is 'end' or -1, use the final day of the simulation. Optionally return dates as well as days. ''' if sc.isstring(days) or not sc.isiterable(days): days = sc.promotetolist(days) for d,day in enumerate(days): if day in ['end', -1]: day = sim['end_day'] days[d] = sim.day(day) # Ensure it's an integer and not a string or something days = np.sort(sc.promotetoarray(days)) # Ensure they're an array and in order if return_dates: dates = [sim.date(day) for day in days] # Store as date strings return days, dates else: return days
def process_days_changes(sim, days, changes): ''' Ensure lists of days and lists of changes are in consistent format. Used by change_beta and clip_edges. ''' if sc.isstring(days) or not sc.isiterable(days): days = sc.promotetolist(days) if isinstance(days, list): for d, day in enumerate(days): days[d] = sim.day( day) # Ensure it's an integer and not a string or something days = sc.promotetoarray(days) changes = sc.promotetoarray(changes) if len(days) != len(changes): errormsg = f'Number of days supplied ({len(days)}) does not match number of changes in beta ({len(changes)})' raise ValueError(errormsg) return days, changes
def compute_losses(self): ''' Compute the weighted goodness-of-fit ''' for key in self.gofs.keys(): if key in self.weights: weight = self.weights[key] if sc.isiterable(weight): # It's an array len_wt = len(weight) len_sim = self.sim_npts len_match = len(self.gofs[key]) if len_wt == len_match: # If the weight already is the right length, do nothing pass elif len_wt == len_sim: # Most typical case: it's the length of the simulation, must trim weight = weight[self.inds.sim[key]] # Trim to matching indices else: errormsg = f'Could not map weight array of length {len_wt} onto simulation of length {len_sim} or data-model matches of length {len_match}' raise ValueError(errormsg) else: weight = 1.0 self.losses[key] = self.gofs[key]*weight return
def check_dist(actual, expected, std=None, dist='norm', check='dist', label=None, alpha=0.05, size=10000, verbose=True, die=False, stats=False): """ Check whether counts match the expected distribution. The distribution can be any listed in scipy.stats. The parameters for the distribution should be supplied via the "expected" argument. The standard deviation for a normal distribution is a special case; it can be supplied separately or calculated from the (actual) data. Args: actual (int, float, or array) : the observed value, or distribution of values expected (int, float, tuple) : the expected value; or, a tuple of arguments std (float) : for normal distributions, the standard deviation of the expected value (taken from data if not supplied) dist (str) : the type of distribution to use check (str) : what to check: 'dist' = entire distribution (default), 'mean' (equivalent to supplying np.mean(actual)), or 'median' label (str) : the name of the variable being tested alpha (float) : the significance level at which to reject the null hypothesis size (int) : the size of the sample from the expected distribution to compare with if distribution is discrete verbose (bool) : print a warning if the null hypothesis is rejected die (bool) : raise an exception if the null hypothesis is rejected stats (bool) : whether to return statistics Returns: If stats is True, returns statistics: whether null hypothesis is rejected, pvalue, number of samples, expected quintiles, observed quintiles, and the observed quantile. **Examples**:: sp.check_dist(actual=[3,4,4,2,3], expected=3, dist='poisson') sp.check_dist(actual=[0.14, -3.37, 0.59, -0.07], expected=0, std=1.0, dist='norm') sp.check_dist(actual=5.5, expected=(1, 5), dist='lognorm') """ # Handle inputs label = f' "{label}"' if label else '' is_dist = sc.isiterable(actual) # Set distribution if dist.lower() in ['norm', 'normal', 'gaussian']: if std is None: if is_dist: std = np.std(actual) # Get standard deviation from the data else: # pragma: no cover std = 1.0 args = (expected, std) scipydist = getattr(scipy.stats, 'norm') truedist = scipy.stats.norm(expected, std) else: try: if sc.isnumber(expected): args = (expected, ) else: args = tuple(expected) scipydist = getattr(scipy.stats, dist) truedist = scipydist(*args) except Exception as E: errormsg = f'Distribution "{dist}" not supported with the expected values supplied; valid distributions are those in scipy.stats' raise NotImplementedError(errormsg) from E # Calculate stats if is_dist and check == 'dist': quantile = truedist.cdf(np.median(actual)) # only if distribution is continuous if isinstance(scipydist, scipy.stats.rv_continuous): teststat, pvalue = scipy.stats.kstest(rvs=actual, cdf=dist, args=args) # Use the K-S test to see if came from the same distribution # ks test against large sample from the theoretical distribution elif isinstance(scipydist, scipy.stats.rv_discrete): expected_r = truedist.rvs(size=size) teststat, pvalue = scipy.stats.ks_2samp(actual, expected_r) else: # pragma: no cover errormsg = 'Distribution is neither continuous or discrete and so not supported at this time.' raise NotImplementedError(errormsg) null = pvalue > alpha else: if check == 'mean': value = np.mean(actual) elif check == 'median': value = np.median(actual) else: value = actual quantile = truedist.cdf(value) # If it's a single value, see where it lands on the Poisson CDF pvalue = 1.0-2*abs(quantile-0.5) # E.g., 0.975 maps on to p=0.05 minquant = alpha/2 # e.g., 0.025 for alpha=0.05 maxquant = 1-alpha/2 # e.g., 0.975 for alpha=0.05 minval = truedist.ppf(minquant) maxval = truedist.ppf(maxquant) quant_check = (minquant <= quantile <= maxquant) # True if above minimum and below maximum val_check = (minval <= value <= maxval) # Check values null = quant_check or val_check # Consider it to pass if either passes # Additional stats n_samples = len(actual) if is_dist else 1 eps = 1.0/n_samples if n_samples > 4 else 1e-2 # For small number of samples, use default limits quintiles = [eps, 0.25, 0.5, 0.75, 1-eps] obvs_quin = np.quantile(actual, quintiles) if is_dist else actual expect_quin = truedist.ppf(quintiles) # If null hypothesis is rejected, print a warning or error if not null: msg = f'''' Variable{label} with n={n_samples} samples is out of range using the distribution: {dist}({args}) → p={pvalue} < α={alpha} Expected quintiles are: {expect_quin} Observed quintiles are: {obvs_quin} Observed median is in quantile: {quantile}''' if die: raise ValueError(msg) elif verbose: warnings.warn(msg) # If null hypothesis is not rejected, under verbose, print a confirmation if null and verbose: print(f'Check passed. Null hypothesis with expected distribution: {dist}{args} not rejected.') if is_dist and check == 'dist': print(f'Test statistic: {teststat}, pvalue: {pvalue}') if not stats: return null else: s = sc.objdict() s.null = null s.pvalue = pvalue s.n_samples = n_samples s.expected_quintiles = expect_quin s.observed_quintiles = obvs_quin s.observed_quantile = quantile return s