def retractOnWorld(cogstate,worldname,proposition): """ INPUT: a triple (cognitive state, worldname, proposition) OUTPUT: array of worlds WHAT IT DOES: Definition 4 (i) """ result=[] world=getWorldByName(worldname,cogstate) for situation in sitgen(world): # s if Forceable(situation, proposition, cogstate): continue # s may not force P adding=False for basis in getAllBases(world,cogstate): # s' if not subset(situation,basis): continue # s is has to be a subset of s' Maximal=True for t in subsitgen(basis): if Forceable(situation, proposition, cogstate): continue # t may not force P if subset(situation,t): if situation != t: Maximal=False if not Maximal: continue # s should be a maximal subset of s' adding=True if adding: result.append(situation) return result
def Forceable(situation, proposition, cogstate): """ INPUT: a triple (situation, proposition, cognitive state) OUTPUT: a Boolean WHAT IT DOES: Decides whether the given situation forces the proposition in the cognitive state. """ for world in cogstate: if world[meta][US]: if subset(situation, world): if world[meta][name] not in proposition: return False return True
def retractOnState(cogstate,proposition): """ INPUT: a pair (cognitive state, proposition) OUTPUT: cognitive state WHAT IT DOES: Definition 4 (ii) """ result=[] for world in cogstate: newworld={} # do not shoot ourselves in the foot newworld["values"]=dict(world["values"]) newworld["meta"]=dict(world["meta"]) addingToFS=False if world["meta"]["US"]: for biworld in cogstate: if biworld["meta"]["FS"]: biretract=retractOnWorld(cogstate,biworld["meta"]["name"],proposition) for s in biretract: if subset(s,world): addingToFS=True newworld["meta"]["FS"]=addingToFS result.append(dict(newworld)) return result
def knock_out_leaving_surveys(data, y, se, key, year, survey_span, num_surveys, prop, survey_date): """ Knock out values of y and se in data so that the remaining data simulates a set of surveys. More specifically, in each level of key multiple surveys are generated by randomly selecting a year to conduct each survey and then marking that year and a number of previous years determined by survey_span to leave in the dataset. Parameters ---------- data : ndarray A structured NumPy array. Should probably not have any missing values or else strange behavior will ensue y : string The label for the response variable in data se : string The label for the standard error variable in data key : string or list of strings The labels for the variables in data that will define separate levels for the knock out scheme. year : string The label for the year in data survey_span : int The number of years that each survey covers num_surveys : int The number of surveys in each country prop : float Proportion of countries to apply the knock out design. 0 <= prop <= 1 survey_date : string The name of a variable to be added to data that contains the year each survey was conducted so that surveys can be distinguished within a given level of the key. This comes in handy if you want to specify correlated noise among surveys in your noiser. Returns ------- ko_data : ndarray The same as data except with values of y and se knocked out Notes ----- In this framework, multiple observations of the same data points cannot be generated """ ko_data = copy.copy(data) ko_data = utilities.add_unique_id(ko_data, key, 'unique_id_for_ko') r = np.where(np.arange(1.,len(np.unique(ko_data['unique_id_for_ko']))+1.) <= len(np.unique(ko_data['unique_id_for_ko']))*prop, True, False) if type(r.tolist()) != type(True): random.shuffle(r) else: r = [r] should_be_kept = {} survey_date_dict = {} for i, id in enumerate(np.unique(ko_data['unique_id_for_ko'])): ko_data_i = utilities.subset(ko_data, 'unique_id_for_ko', id) should_be_kept[id] = [] survey_date_dict[id] = [] if r[i] == True: for s in range(0, num_surveys): survey_year_index = random.choice(range(0,len(ko_data_i[year]))) for j in range(survey_year_index-survey_span, survey_year_index): if (j in range(0, len(ko_data_i[year]))) == True: should_be_kept[id].append(ko_data_i[year][j]) survey_date_dict[id].append(ko_data_i[year][survey_year_index]) else: for j in range(0, len(ko_data_i[year])): should_be_kept[id].append(ko_data_i[year][j]) survey_date_dict[id].append(np.nan) survey_date_list = [np.nan]*len(ko_data[y]) for i in range(0, len(ko_data[y])): id = ko_data['unique_id_for_ko'][i] yr = ko_data[year][i] for j, kept_yr in enumerate(should_be_kept[id]): if kept_yr == yr: survey_date_list[i] = survey_date_dict[id][j] break if utilities.is_nan(survey_date_list[i]) == True: ko_data[y][i] = np.nan ko_data[se][i] = np.nan ko_data = numpy.lib.recfunctions.append_fields(ko_data, survey_date, np.array(survey_date_list)) ko_data = numpy.lib.recfunctions.drop_fields(ko_data, 'unique_id_for_ko') return ko_data
def knock_out_cluster_unit(data, y, se, cluster='iso3', unit='year', prop=.2, design='random'): """ Within levels defined by the cluster variable, knock out a proportion of units in data by replacing values of the variable y. Parameters ---------- data : ndarray A structured array. y : string A label of variable in data that corresponds to the response variable to be knocked out se : string A label of variable in data that corresponds to the standard error variable to be knocked out cluster : string or list of strings A field or list of fields in self.data (e.g. 'iso3' or \"['iso3','age']\"). The knock out scheme is applied separately to levels defined by cluster. unit : string A field in self.data. The unit of the data to knock out. Unit should not have multiple values with in levels of cluster. proportion : float The proportion of data to knock out. design : string If 'random', then a proportion of data is knocked out randomally. If 'first', then the first proportion of data is knocked out and analagously for last. Examples -------- >>> dtype = [('iso3','|S4'),('year','<i4'),('y','<f4'),('se','<f4')] >>> data = np.array([('USA',1990,1,.1),('USA',1991,2,.2),('CAN',1990,3,.3),('CAN',1991,4,.4)], dtype=dtype) >>> ko_data = knock_out_cluster_unit(data,'y','se','iso3','year',.5,'first') >>> utilities.is_nan(ko_data['y'][0]) True >>> utilities.is_nan(ko_data['y'][1]) False >>> utilities.is_nan(ko_data['y'][2]) True >>> utilities.is_nan(ko_data['y'][3]) False >>> utilities.is_nan(ko_data['se'][0]) True >>> utilities.is_nan(ko_data['se'][1]) False >>> utilities.is_nan(ko_data['se'][2]) True >>> utilities.is_nan(ko_data['se'][3]) False # Check to see that original data has not been changed >>> utilities.is_nan(data['y'][0]) False """ data = copy.copy(data) data_cluster = {} if cluster == '': data_cluster[''] = data else: if len(cluster[0]) >= 2: data = utilities.add_unique_id(data, cluster, 'knockerouters_unique_cluster_id') cluster = 'knockerouters_unique_cluster_id' for level in np.unique(data[cluster]): data_cluster[level] = utilities.subset(data, cluster, level) for key in data_cluster.keys(): candidates = [] for i, val in enumerate(data_cluster[key][y]): if utilities.is_nan(val) == False: candidates.append(i) should_be_knocked_out = {} r = np.where(np.arange(1.,len(candidates)+1.) <= len(candidates)*prop, True, False) if type(r.tolist()) != type(True): random.shuffle(r) else: r = [r] for index, i in enumerate(candidates): level = data_cluster[key][unit][i] if design == 'random': should_be_knocked_out[level] = r[index] elif design == 'first': should_be_knocked_out[level] = (float(i+1)/len(candidates)) <= prop elif design == 'last': should_be_knocked_out[level] = (float(i+1)/len(candidates)) >= (1-prop) for i, level in enumerate(data[unit]): if (level in should_be_knocked_out.keys()) == True: if cluster == '': if should_be_knocked_out[level] == True: data[y][i] = np.nan data[se][i] = np.nan else: if should_be_knocked_out[level] == True and data[cluster][i] == key: data[y][i] = np.nan data[se][i] = np.nan if cluster == 'knockerouters_unique_cluster_id': data = numpy.lib.recfunctions.drop_fields(data, 'knockerouters_unique_cluster_id') return data