def fake_amr_ds(fields=("Density", ), geometry="cartesian", particles=0): from yt.frontends.stream.api import load_amr_grids prng = RandomState(0x4d3d3d3) LE, RE = _geom_transforms[geometry] LE = np.array(LE) RE = np.array(RE) data = [] for gspec in _amr_grid_index: level, left_edge, right_edge, dims = gspec left_edge = left_edge * (RE - LE) + LE right_edge = right_edge * (RE - LE) + LE gdata = dict(level=level, left_edge=left_edge, right_edge=right_edge, dimensions=dims) for f in fields: gdata[f] = prng.random_sample(dims) if particles: for i, f in enumerate('particle_position_%s' % ax for ax in 'xyz'): pdata = prng.random_sample(particles) pdata /= (right_edge[i] - left_edge[i]) pdata += left_edge[i] gdata['io', f] = (pdata, 'code_length') for f in ('particle_velocity_%s' % ax for ax in 'xyz'): gdata['io', f] = (prng.random_sample(particles) - 0.5, 'cm/s') gdata['io', 'particle_mass'] = (prng.random_sample(particles), 'g') data.append(gdata) bbox = np.array([LE, RE]).T return load_amr_grids(data, [32, 32, 32], geometry=geometry, bbox=bbox)
class TestExtMathUtils(unittest.TestCase): """Test utils.extmath""" def setUp(self): self.rand = RandomState(0) def test_row_normalize_exp(self): arr = self.rand.random_sample((400, 200)) arr2 = arr.copy() # in-place update row_log_normalize_exp(arr) arr2 -= logsumexp(arr2, axis=1)[:, np.newaxis] assert_almost_equal(arr, arr2) def test_mean_change_2d(self): arr1 = self.rand.random_sample((1000, 200)) arr2 = self.rand.random_sample((1000, 200)) ret1 = mean_change_2d(arr1, arr2) ret2 = np.abs(arr1 - arr2).mean() assert_almost_equal(ret1, ret2) def test_beta_param_update(self): alpha = self.rand.rand() * 10 n_cols = self.rand.randint(100, 200) row_stats = self.rand.random_sample(n_cols) # in-place update arr1 = np.empty((2, n_cols - 1)) beta_param_update(alpha, row_stats, arr1) # expect output arr2 = np.empty((2, n_cols - 1)) arr2[0] = 1.0 + row_stats[:n_cols-1] arr2[1] = alpha + np.flipud(np.cumsum(np.flipud(row_stats[1:]))) assert_almost_equal(arr1, arr2)
def fake_amr_ds( fields=("Density", ), geometry="cartesian", particles=0, length_unit=None): from yt.loaders import load_amr_grids prng = RandomState(0x4D3D3D3) LE, RE = _geom_transforms[geometry] LE = np.array(LE) RE = np.array(RE) data = [] for gspec in _amr_grid_index: level, left_edge, right_edge, dims = gspec left_edge = left_edge * (RE - LE) + LE right_edge = right_edge * (RE - LE) + LE gdata = dict(level=level, left_edge=left_edge, right_edge=right_edge, dimensions=dims) for f in fields: gdata[f] = prng.random_sample(dims) if particles: for i, f in enumerate(f"particle_position_{ax}" for ax in "xyz"): pdata = prng.random_sample(particles) pdata /= right_edge[i] - left_edge[i] pdata += left_edge[i] gdata["io", f] = (pdata, "code_length") for f in (f"particle_velocity_{ax}" for ax in "xyz"): gdata["io", f] = (prng.random_sample(particles) - 0.5, "cm/s") gdata["io", "particle_mass"] = (prng.random_sample(particles), "g") data.append(gdata) bbox = np.array([LE, RE]).T return load_amr_grids(data, [32, 32, 32], geometry=geometry, bbox=bbox, length_unit=length_unit)
def _read(self, file_path): rs = RandomState(seed=1000) with open(cached_path(file_path), "r") as data_file: for _, line in enumerate(data_file.readlines()): items = json.loads(line) metadata = items["metadata"] tokens = metadata["tokens"] query = None if metadata["query"] == "None" else metadata[ "query"] label = metadata["label"] rationale = [x["span"] for x in items["rationale"]["spans"]] document = metadata["document"] if "annotation_id" in items: annotation_id = items["annotation_id"] else: annotation_id = hashlib.sha1( document.encode("utf-8") + (query.encode("utf-8") if query is not None else "". encode("utf-8"))).hexdigest() if rs.random_sample() < self._keep_prob: instance = self.text_to_instance( annotation_id=annotation_id, document=document, query=query, label=label, rationale=rationale, tokens_existing=tokens, ) if instance is not None: yield instance
def _read(self, file_path): rs = RandomState(seed=1000) with open(cached_path(file_path), "r") as data_file: for _, line in enumerate(data_file.readlines()): items = json.loads(line) document = items["original_document"] annotation_id = items["annotation_id"] query = items.get("query", None) label = items.get("label", None) if rs.random_sample() < self._human_prob: rationale = items.get("human_rationale") else: rationale = items.get("predicted_rationale")["spans"] rationale = [span["span"] for span in rationale] if label is not None: label = str(label).replace(" ", "_") instance = self.text_to_instance( annotation_id=annotation_id, document=document, query=query, label=label, rationale=rationale, ) yield instance
def sample_transformed( *, rng: RandomState, lo: float, hi: float, ) -> float: assert 0.0 <= lo <= hi <= 1.0, \ f'bounds [{lo},{hi}] must be within [0,1]' size = hi - lo return rng.random_sample() * size + lo
def fake_particle_ds( fields=("particle_position_x", "particle_position_y", "particle_position_z", "particle_mass", "particle_velocity_x", "particle_velocity_y", "particle_velocity_z"), units=('cm', 'cm', 'cm', 'g', 'cm/s', 'cm/s', 'cm/s'), negative=(False, False, False, False, True, True, True), npart=16**3, length_unit=1.0, data=None): from yt.frontends.stream.api import load_particles prng = RandomState(0x4d3d3d3) if not iterable(negative): negative = [negative for f in fields] assert (len(fields) == len(negative)) offsets = [] for n in negative: if n: offsets.append(0.5) else: offsets.append(0.0) data = {} for field, offset, u in zip(fields, offsets, units): if field in data: v = data[field] continue if "position" in field: v = prng.normal(loc=0.5, scale=0.25, size=npart) np.clip(v, 0.0, 1.0, v) v = (prng.random_sample(npart) - offset) data[field] = (v, u) bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]) ds = load_particles(data, 1.0, bbox=bbox) return ds
def sample_list_item(x: List[Any], probs: Optional[np.ndarray], random_state: RandomState) -> Any: """ Sample a list item according to the items' probabilities. :param x: Items to sample. :param probs: Probabilities (must have same length as `x` and sum to 1), or None for uniform distribution. :param random_state: Random state. :return: Sampled list item. """ if probs is None: probs = np.repeat(1 / len(x), len(x)) cdf_y_rand = random_state.random_sample() cum_probs = probs.cumsum() final_cum_prob = cum_probs[-1] if abs(1.0 - final_cum_prob) > 0.00001: raise ValueError( f'Expected cumulative probabilities to sum to 1, but got {final_cum_prob} instead.' ) x_i = next(i for i, cum_prob in enumerate(cum_probs) if cdf_y_rand < cum_prob) return x[x_i]
class ArbitraryDriver(object): def __init__(self, seed, item_range_max, permutation_seed=100, name=None, zipf_param=1.0001, d_second=-1): self.rand = RandomState(seed) self.max_item = item_range_max self.permute_seed = permutation_seed if name == None: self.name = self.__class__.__name__ else: self.name = name @classmethod def get_item(self, r_float): pass def get_cost(self, r_float, item_num): pass def permute_float(self, r_float): return r_float def sample_item_w_cost(self): r_float = self.rand.random_sample() cost_float = self.permute_float(r_float) item = self.get_item(r_float) cost = self.get_cost(cost_float, item) return (item, cost)
def test_precision(): rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))), (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))), ( DecisionTreeRegressor( criterion="friedman_mse", random_state=0, max_depth=1), DecisionTreeClassifier(max_depth=1, random_state=0), ), ): clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz(clf, out_file=None, precision=precision, proportion=True) # With the current random state, the impurity and the threshold # will have the number of precision set in the export_graphviz # function. We will check the number of precision with a strict # equality. The value reported will have only 2 precision and # therefore, only a less equal comparison will be done. # check value for finding in finditer(r"value = \d+\.\d+", dot_data): assert len(search(r"\.\d+", finding.group()).group()) <= precision + 1 # check impurity if is_classifier(clf): pattern = r"gini = \d+\.\d+" else: pattern = r"friedman_mse = \d+\.\d+" # check impurity for finding in finditer(pattern, dot_data): assert len(search(r"\.\d+", finding.group()).group()) == precision + 1 # check threshold for finding in finditer(r"<= \d+\.\d+", dot_data): assert len(search(r"\.\d+", finding.group()).group()) == precision + 1
def test_precision(): rng_reg = RandomState(2) rng_clf = RandomState(8) for X, y, clf in zip( (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))), (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))), (DecisionTreeRegressor(criterion="friedman_mse", random_state=0, max_depth=1), DecisionTreeClassifier(max_depth=1, random_state=0))): clf.fit(X, y) for precision in (4, 3): dot_data = export_graphviz(clf, out_file=None, precision=precision, proportion=True) # With the current random state, the impurity and the threshold # will have the number of precision set in the export_graphviz # function. We will check the number of precision with a strict # equality. The value reported will have only 2 precision and # therefore, only a less equal comparison will be done. # check value for finding in finditer(r"value = \d+\.\d+", dot_data): assert_less_equal( len(search(r"\.\d+", finding.group()).group()), precision + 1) # check impurity if is_classifier(clf): pattern = r"gini = \d+\.\d+" else: pattern = r"friedman_mse = \d+\.\d+" # check impurity for finding in finditer(pattern, dot_data): assert_equal(len(search(r"\.\d+", finding.group()).group()), precision + 1) # check threshold for finding in finditer(r"<= \d+\.\d+", dot_data): assert_equal(len(search(r"\.\d+", finding.group()).group()), precision + 1)
class RandomIntVal: seed = 1012810 nState = RandomState(seed) cState = random.StrongRandom() def __init__(self, seed=1012810): self.nState = RandomState(seed) self.cState = random.StrongRandom() # Sampler warmup print "Starting Sampler Warm-up" junk = self.nState.random_sample(10000) print "Warm-up Complete" def getValue(self): maxsize = sys.maxint-1 rn = float(self.cState.randint(0,maxsize))/maxsize return rn def getValueTwister(self): return self.nState.random_sample()
def get_cost(self, r_float, item): cost = (item**self.cost_power) / self.inner_max_cost if self.perturbate > 0: if len(self.perturbed) == 0: r = RandomState(self.permute_seed) self.perturbed = ( 1.0 - (r.random_sample(self.max_item) * self.perturbate)) # item = float(self.perturbed[item] * item) cost *= float(self.perturbed[item]) return cost
def fake_random_ds(ndims, peak_value=1.0, fields=("density", "velocity_x", "velocity_y", "velocity_z"), units=('g/cm**3', 'cm/s', 'cm/s', 'cm/s'), particle_fields=None, particle_field_units=None, negative=False, nprocs=1, particles=0, length_unit=1.0, unit_system="cgs", bbox=None): from yt.frontends.stream.api import load_uniform_grid prng = RandomState(0x4d3d3d3) if not iterable(ndims): ndims = [ndims, ndims, ndims] else: assert (len(ndims) == 3) if not iterable(negative): negative = [negative for f in fields] assert (len(fields) == len(negative)) offsets = [] for n in negative: if n: offsets.append(0.5) else: offsets.append(0.0) data = {} for field, offset, u in zip(fields, offsets, units): v = (prng.random_sample(ndims) - offset) * peak_value if field[0] == "all": data['number_of_particles'] = v.size v = v.ravel() data[field] = (v, u) if particles: if particle_fields is not None: for field, unit in zip(particle_fields, particle_field_units): if field in ('particle_position', 'particle_velocity'): data['io', field] = (prng.random_sample( (particles, 3)), unit) else: data['io', field] = (prng.random_sample(size=particles), unit) else: for f in ('particle_position_%s' % ax for ax in 'xyz'): data['io', f] = (prng.random_sample(size=particles), 'code_length') for f in ('particle_velocity_%s' % ax for ax in 'xyz'): data['io', f] = (prng.random_sample(size=particles) - 0.5, 'cm/s') data['io', 'particle_mass'] = (prng.random_sample(particles), 'g') data['number_of_particles'] = particles ug = load_uniform_grid(data, ndims, length_unit=length_unit, nprocs=nprocs, unit_system=unit_system, bbox=bbox) return ug
def fake_random_ds( ndims, peak_value=1.0, fields=("density", "velocity_x", "velocity_y", "velocity_z"), units=("g/cm**3", "cm/s", "cm/s", "cm/s"), particle_fields=None, particle_field_units=None, negative=False, nprocs=1, particles=0, length_unit=1.0, unit_system="cgs", bbox=None, ): from yt.loaders import load_uniform_grid prng = RandomState(0x4D3D3D3) if not is_sequence(ndims): ndims = [ndims, ndims, ndims] else: assert len(ndims) == 3 if not is_sequence(negative): negative = [negative for f in fields] assert len(fields) == len(negative) offsets = [] for n in negative: if n: offsets.append(0.5) else: offsets.append(0.0) data = {} for field, offset, u in zip(fields, offsets, units): v = (prng.random_sample(ndims) - offset) * peak_value if field[0] == "all": v = v.ravel() data[field] = (v, u) if particles: if particle_fields is not None: for field, unit in zip(particle_fields, particle_field_units): if field in ("particle_position", "particle_velocity"): data["io", field] = (prng.random_sample((int(particles), 3)), unit) else: data["io", field] = (prng.random_sample(size=int(particles)), unit) else: for f in (f"particle_position_{ax}" for ax in "xyz"): data["io", f] = (prng.random_sample(size=particles), "code_length") for f in (f"particle_velocity_{ax}" for ax in "xyz"): data["io", f] = (prng.random_sample(size=particles) - 0.5, "cm/s") data["io", "particle_mass"] = (prng.random_sample(particles), "g") ug = load_uniform_grid( data, ndims, length_unit=length_unit, nprocs=nprocs, unit_system=unit_system, bbox=bbox, ) return ug
def samples_generator(fn, shape, rng, seed): u''' Generate random samples for the model: @fn - function to be applied on the input features to get the ouput @shape - shape of the features matrix (num_samples, num_features) @rng - range of the input features to be generated within (a,b) Outputs a tuple of input and output features matrix ''' prng = RandomState(int(seed)) x = (rng[1] - rng[0]) * prng.random_sample(shape) + rng[0] y = np.apply_along_axis(fn, 1, x).reshape((shape[0], -1)) z = np.zeros((shape[0], shape[1] - y.shape[1])) y = np.concatenate((y, z), axis=1) return x, y
def fake_particle_ds( fields=None, units=None, negative=None, npart=16 ** 3, length_unit=1.0, data=None, ): from yt.loaders import load_particles prng = RandomState(0x4D3D3D3) if negative is not None and not is_sequence(negative): negative = [negative for f in fields] fields, units, negative = _check_field_unit_args_helper( { "fields": fields, "units": units, "negative": negative, }, { "fields": _fake_particle_ds_default_fields, "units": _fake_particle_ds_default_units, "negative": _fake_particle_ds_default_negative, }, ) offsets = [] for n in negative: if n: offsets.append(0.5) else: offsets.append(0.0) data = data if data else {} for field, offset, u in zip(fields, offsets, units): if field in data: v = data[field] continue if "position" in field: v = prng.normal(loc=0.5, scale=0.25, size=npart) np.clip(v, 0.0, 1.0, v) v = prng.random_sample(npart) - offset data[field] = (v, u) bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]) ds = load_particles(data, 1.0, bbox=bbox) return ds
def fake_particle_ds( fields=( "particle_position_x", "particle_position_y", "particle_position_z", "particle_mass", "particle_velocity_x", "particle_velocity_y", "particle_velocity_z", ), units=("cm", "cm", "cm", "g", "cm/s", "cm/s", "cm/s"), negative=(False, False, False, False, True, True, True), npart=16 ** 3, length_unit=1.0, data=None, ): from yt.loaders import load_particles prng = RandomState(0x4D3D3D3) if not is_sequence(negative): negative = [negative for f in fields] assert len(fields) == len(negative) offsets = [] for n in negative: if n: offsets.append(0.5) else: offsets.append(0.0) data = data if data else {} for field, offset, u in zip(fields, offsets, units): if field in data: v = data[field] continue if "position" in field: v = prng.normal(loc=0.5, scale=0.25, size=npart) np.clip(v, 0.0, 1.0, v) v = prng.random_sample(npart) - offset data[field] = (v, u) bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]) ds = load_particles(data, 1.0, bbox=bbox) return ds
def test(cores=None): """ """ #Test data w = ps.lat2W(10, 10) random_int = RandomState(123456789) attribute = random_int.random_sample((w.n, 2)) #mp Boilerplate if cores == None: cores = mp.cpu_count() numifs = 20 #Locking solution space solution_lock = mp.Lock() csoln_space = mp.Array(ctypes.c_int32, numifs * (w.n + 1), lock=solution_lock) soln_space = np.frombuffer(csoln_space.get_obj(), dtype=np.int32) soln_space[:] = 0 soln_space.shape = (-1, w.n + 1) initshared_soln(csoln_space) jobs = [] for i in xrange(cores): p = IFS(attribute, w, lock=solution_lock, pid=i) jobs.append(p) p.start() for j in jobs: j.join() for i in range(numifs): checkcontiguity(soln_space[i], w) """ for i in range(numifs): print soln_space[i][1:].reshape(-1,10) print """ print "Generated solution space with {} regions per solution".format( soln_space[:, 0])
def _read(self, file_path): gold_path, predicted_path = file_path.split(";") rs = RandomState(seed=1000) with open(cached_path(gold_path), "r") as gold_file, open(cached_path(predicted_path, "r")) as predicted_file: for _, (gold_line, predicted_line) in enumerate( zip_longest(gold_file.readlines(), predicted_file.readlines()) ): gold_items = json.loads(gold_line) predicted_items = json.loads(predicted_line) assert gold_items["document"] == predicted_items["metadata"]["document"], breakpoint() assert gold_items["annotation_id"] == predicted_items["metadata"]["annotation_id"], breakpoint() metadata = predicted_items["metadata"] tokens = metadata["tokens"] predicted_rationale = [x["span"] for x in predicted_items["rationale"]["spans"]] predicted_token_rationale = [0] * len(metadata["tokens"]) for s, e in predicted_rationale: for i in range(s, e): predicted_token_rationale[i] = 1 gold_token_rationale = self.map_rationale_to_gold_document(gold_items, tokens) if rs.random_sample() < self._human_prob: rationale = gold_token_rationale else: rationale = predicted_token_rationale instance = self.text_to_instance( annotation_id=gold_items["annotation_id"], document=gold_items["document"], query=gold_items.get("query", None), label=gold_items["label"], rationale=rationale, tokens_existing=tokens, ) if instance is not None: yield instance
def test(cores=None): """ """ #Test data w = ps.lat2W(10, 10) random_int = RandomState(123456789) attribute = random_int.random_sample((w.n, 2)) #mp Boilerplate if cores == None: cores = mp.cpu_count() numifs = 20 #Locking solution space solution_lock = mp.Lock() csoln_space = mp.Array(ctypes.c_int32, numifs * (w.n + 1), lock=solution_lock) soln_space = np.frombuffer(csoln_space.get_obj(), dtype=np.int32) soln_space[:] = 0 soln_space.shape = (-1, w.n + 1) initshared_soln(csoln_space) jobs = [] for i in xrange(cores): p = IFS(attribute, w, lock=solution_lock, pid=i) jobs.append(p) p.start() for j in jobs: j.join() for i in range(numifs): checkcontiguity(soln_space[i], w) """ for i in range(numifs): print soln_space[i][1:].reshape(-1,10) print """ print "Generated solution space with {} regions per solution".format(soln_space[:,0])
class Randomizer(object): def __init__(self, size, seed=None): self.size = size self.local_index = 0 self.total_count = 0 self.Seed = seed self.Rstate = RandomState(seed) self.np_random = self.Rstate.random_sample(size) def next_element(self, array, index=0): """ Get the next random element, and index from given array starting from index to end """ i = self.next_random(index, len(array)) return array[i], i def sample(self, population, k): # An n-length list is smaller than a k-length set n = len(population) result = [None] * k pool = list(population) for i in range(k): # invariant: non-selected at [0,n-i) j = self.next_random(0, n - i) result[i] = pool[j] pool[j] = pool[n - i - 1] # move non-selected item into vacancy return result def random(self): self.local_index += 1 self.total_count += 1 if self.local_index >= self.size: # print("Run out of random, reseting") self.local_index = 0 return self.np_random[self.local_index] def next_random(self, low, high): return int(self.random() * (high - low) + low)
def _read(self, file_path): rs = RandomState(seed=1000) with open(cached_path(file_path), "r") as data_file: for _, line in enumerate(data_file.readlines()): items = json.loads(line) document = items["document"] annotation_id = items["annotation_id"] query = items.get("query", None) label = items.get("label", None) rationale = items.get("rationale", []) if label is not None: label = str(label).replace(" ", "_") if rs.random_sample() < self._keep_prob: instance = self.text_to_instance( annotation_id=annotation_id, document=document, query=query, label=label, rationale=rationale) if instance is not None: yield instance
class TestAnalyzer: def setUp(self): self.prng = RandomState(133) self.df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2], 'f1': self.prng.normal(0, 1, 10), 'f2': self.prng.normal(1, 0.1, 10), 'f3': self.prng.normal(2, 0.1, 10), 'group': ['group1'] * 10}, index=range(0, 10)) self.df_features_same_score = self.df_features.copy() self.df_features_same_score[['sc1']] = [3] * 10 self.df_features_with_groups = self.df_features.copy() self.df_features_with_groups['group'] = ['group1']*5 + ['group2']*5 self.df_features_with_groups_and_length = self.df_features_with_groups.copy() self.df_features_with_groups_and_length['length'] = self.prng.normal(50, 250, 10) self.human_scores = pd.Series(self.prng.randint(1, 5, size=10)) self.system_scores = pd.Series(self.prng.random_sample(10) * 5) self.same_human_scores = pd.Series([3] * 10) # get the directory containing the tests self.test_dir = dirname(__file__) def test_correlation_helper(self): # test that there are no nans for data frame with 10 values retval = Analyzer.correlation_helper(self.df_features, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 0) def test_correlation_helper_for_data_with_one_row(self): # this should return two data frames with nans retval = Analyzer.correlation_helper(self.df_features[:1], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_correlation_helper_for_data_with_two_rows(self): # this should return 1/-1 for marginal correlations and nans for # partial correlations retval = Analyzer.correlation_helper(self.df_features[:2], 'sc1', 'group') assert_equal(abs(retval[0].values).sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_correlation_helper_for_data_with_three_rows(self): # this should compute marginal correlations but return Nans for # partial correlations retval = Analyzer.correlation_helper(self.df_features[:3], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 3) def test_correlation_helper_for_data_with_four_rows(self): # this should compute marginal correlations and return a unity # matrix for partial correlations # it should also raise a UserWarning with warnings.catch_warnings(record=True) as warning_list: retval = Analyzer.correlation_helper(self.df_features[:4], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_almost_equal(np.abs(retval[1].values).sum(), 0.9244288637889855) assert issubclass(warning_list[-1].category, UserWarning) def test_correlation_helper_for_data_with_groups(self): retval = Analyzer.correlation_helper(self.df_features_with_groups, 'sc1', 'group') assert_equal(len(retval[0]), 2) assert_equal(len(retval[1]), 2) def test_correlation_helper_for_one_group_with_one_row(self): # this should return a data frames with nans for group with 1 row retval = Analyzer.correlation_helper(self.df_features_with_groups[:6], 'sc1', 'group') assert_equal(len(retval[0]), 2) assert_equal(len(retval[1]), 2) assert_equal(retval[0].isnull().values.sum(), 3) def test_correlation_helper_for_groups_and_length(self): retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length, 'sc1', 'group', include_length=True) for df in retval: assert_equal(len(df), 2) assert_equal(len(df.columns), 3) def test_correlation_helper_for_group_with_one_row_and_length(self): # this should return a data frames with nans for group with 1 row retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length[:6], 'sc1', 'group', include_length=True) for df in retval: assert_equal(len(df), 2) assert_equal(len(df.columns), 3) def test_that_correlation_helper_works_for_data_with_the_same_human_score(self): # this test should raise UserWarning because the determinant is very close to # zero. It also raises Runtime warning because # variance of human scores is 0. with warnings.catch_warnings(record=True) as warning_list: warnings.filterwarnings('ignore', category=RuntimeWarning) retval = Analyzer.correlation_helper(self.df_features_same_score, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) assert issubclass(warning_list[-1].category, UserWarning) def test_that_metrics_helper_works_for_data_with_one_row(self): # There should be NaNs for SMD, correlations and both sds # note that we will get a value for QWK since we are # dividing by N and not N-1 with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) evals = Analyzer.metrics_helper(self.human_scores[0:1], self.system_scores[0:1]) assert_equal(evals.isnull().values.sum(), 5) def test_that_metrics_helper_works_for_data_with_the_same_label(self): # There should be NaNs for correlation and SMD. # Note that for a dataset with a single response # kappas will be 0 or 1 with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) evals = Analyzer.metrics_helper(self.same_human_scores, self.system_scores) assert_equal(evals.isnull().values.sum(), 2) def test_metrics_helper_population_sds(self): df_new_features = pd.read_csv(join(self.test_dir, 'data', 'files', 'train.csv')) # compute the metrics when not specifying the population SDs computed_metrics1 = Analyzer.metrics_helper(df_new_features['score'], df_new_features['score2']) expected_metrics1 = pd.Series({'N': 500.0, 'R2': 0.65340566606389394, 'RMSE': 0.47958315233127197, 'SMD': 0.03679030063229779, 'adj_agr': 100.0, 'corr': 0.82789026370069529, 'exact_agr': 77.0, 'h_max': 6.0, 'h_mean': 3.4199999999999999, 'h_min': 1.0, 'h_sd': 0.81543231461565147, 'kappa': 0.6273493195074531, 'sys_max': 6.0, 'sys_mean': 3.4500000000000002, 'sys_min': 1.0, 'sys_sd': 0.81782496620652367, 'wtkappa': 0.8273273273273274}) # and now compute them specifying the population SDs computed_metrics2 = Analyzer.metrics_helper(df_new_features['score'], df_new_features['score2'], population_human_score_sd=0.5, population_system_score_sd=0.4, smd_method='williamson') # the only number that should change is the SMD expected_metrics2 = expected_metrics1.copy() expected_metrics2['SMD'] = 0.066259 assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index()) assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index()) def test_metrics_helper_zero_system_sd(self): human_scores = [1, 3, 4, 2, 3, 1, 3, 4, 2, 1] system_score = [2.54] * 10 computed_metrics1 = Analyzer.metrics_helper(human_scores, system_score) expected_metrics1 = pd.Series({'N': 10, 'R2': -0.015806451612903283, 'RMSE': 1.122319027727856, 'SMD': 0.11927198519188371, 'adj_agr': 50.0, 'corr': None, 'exact_agr': 0, 'h_max': 4, 'h_mean': 2.4, 'h_min': 1.0, 'h_sd': 1.1737877907772674, 'kappa': 0, 'sys_max': 2.54, 'sys_mean': 2.54, 'sys_min': 2.54, 'sys_sd': 0, 'wtkappa': 0}) # now compute DSM computed_metrics2 = Analyzer.metrics_helper(human_scores, system_score, use_diff_std_means=True) # the only number that should change is the SMD expected_metrics2 = expected_metrics1.copy() expected_metrics2.drop("SMD", inplace=True) expected_metrics2['DSM'] = None assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index(), check_dtype=False) assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index(), check_dtype=False) def test_compute_pca_less_samples_than_features(self): # test pca when we have less samples than # features. In this case the number of components # equals to the number of samples. df = pd.DataFrame({'a': range(50)}) for i in range(100): df[i] = df['a'] * i (components, variance) = Analyzer.compute_pca(df, df.columns) assert_equal(len(components.columns), 50) assert_equal(len(variance.columns), 50) def test_compute_disattenuated_correlations_single_human(self): hm_corr = pd.Series([0.9, 0.8, 0.6], index=['raw', 'raw_trim', 'raw_trim_round']) hh_corr = pd.Series([0.81], index=['']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_equal(df_dis_corr.loc['raw', 'corr_disattenuated'], 1.0) def test_compute_disattenuated_correlations_matching_human(self): hm_corr = pd.Series([0.9, 0.4, 0.6], index=['All data', 'GROUP1', 'GROUP2']) hh_corr = pd.Series([0.81, 0.64, 0.36], index=['All data', 'GROUP1', 'GROUP2']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, 0.5, 1.0]) def test_compute_disattenuated_correlations_single_matching_human(self): hm_corr = pd.Series([0.9, 0.4, 0.6], index=['All data', 'GROUP1', 'GROUP2']) hh_corr = pd.Series([0.81], index=['All data']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan]) def test_compute_disattenuated_correlations_mismatched_indices(self): hm_corr = pd.Series([0.9, 0.6], index=['All data', 'GROUP2']) hh_corr = pd.Series([0.81, 0.64], index=['All data', 'GROUP1']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan]) def test_compute_disattenuated_correlations_negative_human(self): hm_corr = pd.Series([0.9, 0.8], index=['All data', 'GROUP1']) hh_corr = pd.Series([-0.03, 0.64], index=['All data', 'GROUP1']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 2) assert_array_equal(df_dis_corr['corr_disattenuated'], [np.nan, 1.0])
prng = RandomState(133) df_features = pd.DataFrame( { 'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2], 'f1': prng.normal(0, 1, 10), 'f2': prng.normal(1, 0.1, 10), 'f3': prng.normal(2, 0.1, 10), 'group': ['group1'] * 10 }, index=range(0, 10)) df_features_same_score = df_features.copy() df_features_same_score[['sc1']] = [3] * 10 human_scores = pd.Series(prng.randint(1, 5, size=10)) system_scores = pd.Series(prng.random_sample(10) * 5) same_human_scores = pd.Series([3] * 10) def test_correlation_helper(): # test that there are no nans for data frame with 10 values retval = correlation_helper(df_features, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 0) def test_that_correlation_helper_works_for_data_with_one_row(): # this should return two data frames with nans retval = correlation_helper(df_features[:1], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3)
α_EXACT = 2.4 β_EXACT = 0.6 def my_exp_func(x): return α_EXACT * np.exp(β_EXACT * x) xmin = 0 xmax = 5.0 Nsamples = 20 #x_sample = np.linspace(xmin, xmax, Nsamples) # OR: (randomly spaced data) # x_rand = (xmax - xmin) * rs.random_sample(Nsamples) + xmin x_sample = np.sort(x_rand) y_exact = my_exp_func(x_sample) NOISE_AMPLITUDE = 1.0 y_noisy = y_exact + rs.randn(Nsamples) * NOISE_AMPLITUDE #plt.clf() #plt.plot(x_sample, y_exact, label="exact", marker="o") #plt.plot(x_sample, y_noisy, label="noisy", marker="o", linewidth=0) #plt.legend() #plt.savefig("IMG_DATA_v2.png", dpi=150) # Do linear regression here ...
replace = np.argmax(sharedSoln[0]) sharedSoln[:,replace] = sharedSoln[:,current_best] replace_list.append(replace) return replace_list def tabulength(numP): '''Talliard(1990)''' smin = (numP-1) * 0.9 smax = (numP-1) * 1.1 tabu_length = 6 + (randint(0,int(smax - smin))) return int(tabu_length) '''Test Data Generation a la PySAL tests.''' #Setup the test data: w = pysal.lat2W(10, 10) random_init = RandomState(123456789) z = random_init.random_sample((w.n, 2)) #print z.max(), z.min(), z.std() #Comment out to verify that the 'random' seed is identical over tests p = np.ones((w.n, 1), float) floor_variable = p floor = 3 '''START TIMING HERE - AFTER TEST DATA GENERATED''' time0 = time.time() #Multiprocessing setup cores = mp.cpu_count() cores = cores * 2 numP = len(p)+1 #Shared memory solution space lockSoln = mp.Lock() cSoln = Array(ctypes.c_double, numP*cores, lock=lockSoln)
import os import struct import timeit import numpy as np import pandas as pd from numpy.random import RandomState rs = RandomState() SETUP = ''' import numpy as np import {mod}.{rng} rs = {mod}.{rng}.RandomState() rs.random_sample() ''' scale_32 = scale_64 = 1 if struct.calcsize('P') == 8 and os.name != 'nt': # 64 bit scale_32 = 0.5 else: scale_64 = 2 # RNGS = ['mlfg_1279_861', 'mrg32k3a', 'pcg64', 'pcg32', 'mt19937', 'xorshift128', 'xorshift1024', # 'xoroshiro128plus', 'dsfmt', 'random'] RNGS = ['mt19937'] def timer(code, setup): return 1000 * min(timeit.Timer(code, setup=setup).repeat(10, 10)) / 10.0
def get_cost(self, r_float, item_num): if len(self.costs) == 0: r = RandomState(self.permute_seed) self.costs = r.random_sample(self.max_item) return float(self.costs[item_num])
class SampleConsensus(metaclass=abc.ABCMeta): ''' SampleConsensus represents the base class. All sample consensus methods must inherit from this class. ''' def __init__(self, model, random=False, probability=.99, threshold=float('inf'), max_iterations=1000): self._sac_model = model self.probability = probability self.distance_threshold = threshold self.max_iterations = max_iterations self._model = [] self._inliers = [] self._model_coefficients = None if random: self._rng = RandomState() else: self._rng = RandomState(12345) @abc.abstractmethod def compute_model(self): ''' Compute the actual model. Pure virtual. ''' pass @property def model(self): ''' Return indices of the points that build the best model found so far. ''' return self._model @property def inliers(self): ''' Return the best set of inliers found so far for this model. ''' return self._inliers @property def model_coefficients(self): ''' Return the model coefficients of the best model found so far. ''' return self._model_coefficients @property def sample_consensus_model(self): ''' Get the Sample Consensus model used ''' return self._sac_model @sample_consensus_model.setter def sample_consensus_model(self, value): ''' Set the Sample Consensus model to use ''' self._sac_model = value def refine_model(self, sigma=3., max_iterations=1000): ''' Refine the model found. This loops over the model coefficients and optimizes them together with the set of inliers, until the change in the set of inliers is minimal. # Parameters sigma : float standard deviation multiplier for considering a sample as inlier (Mahalanobis distance) max_iterations : int the maxim number of iterations to try to refine in case the inliers keep on changing ''' if self._sac_model is None: raise ValueError('null model!') logger = logging.getLogger('pcl.sac.SampleConsensus.refine_model') inlier_distance_threshold_sqr = self.distance_threshold * self.distance_threshold error_threshold = self.distance_threshold sigma_sqr = sigma * sigma refine_iterations = 0 inlier_changed, oscillating = False, False inliers_sizes = [] new_inliers = prev_inliers = self._inliers new_model_coefficients = self._model_coefficients while True: # Optimize the model coefficients new_model_coefficients = self._sac_model\ .optimize_model_coefficients(prev_inliers, new_model_coefficients) inliers_sizes.append(len(prev_inliers)) # Select the new inliers based on the optimized coefficients and new threshold new_inliers = self._sac_model.select_within_distance( new_model_coefficients, error_threshold) logger.debug( 'Number of inliers found (before/after): %lu/%lu, ' + 'with an error threshold of %g.', len(prev_inliers), len(new_inliers), error_threshold) if len(new_inliers) == 0: refine_iterations += 1 if refine_iterations >= max_iterations: break continue # Estimate the variance and the new threshold variance = self._sac_model.compute_variance() error_threshold = math.sqrt( min(inlier_distance_threshold_sqr, sigma_sqr * variance)) logger.debug( 'New estimated error threshold: %g on iteration %d out of %d.', error_threshold, refine_iterations, max_iterations) inlier_changed = False prev_inliers, new_inliers = new_inliers, prev_inliers # If the number of inliers changed, then we are still optimizing if len(new_inliers) != len(prev_inliers): # Check if the number of inliers is oscillating in between two values if len(inliers_sizes) >= 4: if inliers_sizes[-1] == inliers_sizes[-3] and \ inliers_sizes[-2] == inliers_sizes[-4]: oscillating = False break inlier_changed = True continue # Check the values of the inlier set for idx, val in enumerate(prev_inliers): # If the value of the inliers changed, then we are still optimizing if val != new_inliers[idx]: inlier_changed = True break refine_iterations += 1 if inlier_changed and refine_iterations < max_iterations: break # If the new set of inliers is empty, we didn't do a good job refining if len(new_inliers) == 0: logger.error('Refinement failed: got an empty set of inliers!') return False if oscillating: logger.debug('Detected oscillations in the model refinement.') return True if not inlier_changed: self._inliers, new_inliers = new_inliers, self._inliers self._model_coefficients = new_model_coefficients return True # If no inliers have been changed anymore, then the refinement was successful return False def get_random_samples(self, indices, nr_samples): ''' Get a set of randomly selected indices. # Parameters indices : list or array The input indices vector nr_samples : int The desired number of point indices to randomly select ''' sample = self._rng.random_sample(nr_samples) * len(indices) return np.array(indices, copy=False)[sample.astype(int)]
from rsmtool.analysis import (correlation_helper, metrics_helper) prng = RandomState(133) df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2], 'f1': prng.normal(0, 1, 10), 'f2': prng.normal(1, 0.1, 10), 'f3': prng.normal(2, 0.1, 10), 'group': ['group1']*10}, index = range(0, 10)) df_features_same_score = df_features.copy() df_features_same_score[['sc1']] = [3]*10 human_scores = pd.Series(prng.randint(1, 5, size=10)) system_scores = pd.Series(prng.random_sample(10)*5) same_human_scores = pd.Series([3]*10) def test_correlation_helper(): # test that there are no nans for data frame with 10 values retval = correlation_helper(df_features, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 0) def test_that_correlation_helper_works_for_data_with_one_row(): # this should return two data frames with nans retval = correlation_helper(df_features[:1], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3)
def anytime_explain(self, instance, callback=None, update_func=None, update_prediction=None): data_rows, no_atr = self.data.X.shape class_value = self.model(instance)[0] prng = RandomState(self.seed) self.init_arrays(no_atr) attr_values = self.get_atr_column(instance) batch_mx_size = self.batch_size * no_atr z_sq = abs(st.norm.ppf(self.p_val/2))**2 tiled_inst = self.tile_instance(instance) inst1 = copy.deepcopy(tiled_inst) inst2 = copy.deepcopy(tiled_inst) worst_case = self.max_iter*no_atr time_point = time.time() update_table = False domain = Domain([ContinuousVariable("Score"), ContinuousVariable("Error")], metas=[StringVariable(name="Feature"), StringVariable(name="Value")]) if update_prediction is not None: update_prediction(class_value) def create_res_table(): nonzero = self.steps != 0 expl_scaled = (self.expl[nonzero] / self.steps[nonzero]).reshape(1, -1) """ creating return array""" ips = np.hstack((expl_scaled.T, np.sqrt( z_sq * self.var[nonzero] / self.steps[nonzero]).reshape(-1, 1))) table = Table.from_numpy(domain, ips, metas=np.hstack((np.asarray(self.atr_names)[nonzero[0]].reshape(-1, 1), attr_values[nonzero[0]].reshape(-1, 1)))) return table while not(all(self.iterations_reached[0, :] > self.max_iter)): prog = 1 - np.sum(self.max_iter - self.iterations_reached)/worst_case if (callback(int(prog*100))): break if not(any(self.iterations_reached[0, :] > self.max_iter)): a = np.argmax(prng.multinomial( 1, pvals=(self.var[0, :]/(np.sum(self.var[0, :]))))) else: a = np.argmin(self.iterations_reached[0, :]) perm = (prng.random_sample(batch_mx_size).reshape( self.batch_size, no_atr)) > 0.5 rand_data = self.data.X[prng.randint(0, data_rows, size=self.batch_size), :] inst1.X = np.copy(tiled_inst.X) inst1.X[perm] = rand_data[perm] inst2.X = np.copy(inst1.X) inst1.X[:, a] = tiled_inst.X[:, a] inst2.X[:, a] = rand_data[:, a] f1 = self._get_predictions(inst1, class_value) f2 = self._get_predictions(inst2, class_value) diff = np.sum(f1 - f2) self.expl[0, a] += diff """update variance""" self.steps[0, a] += self.batch_size self.iterations_reached[0, a] += self.batch_size d = diff - self.mu[0, a] self.mu[0, a] += d / self.steps[0, a] self.M2[0, a] += d * (diff - self.mu[0, a]) self.var[0, a] = self.M2[0, a] / (self.steps[0, a] - 1) if time.time() - time_point > 1: update_table = True time_point = time.time() if update_table: update_table = False update_func(create_res_table()) # exclude from sampling if necessary needed_iter = z_sq * self.var[0, a] / (self.error**2) if (needed_iter <= self.steps[0, a]) and (self.steps[0, a] >= self.min_iter) or (self.steps[0, a] > self.max_iter): self.iterations_reached[0, a] = self.max_iter + 1 return class_value, create_res_table()
import os import struct import timeit import pandas as pd import numpy as np from numpy.random import RandomState rs = RandomState() SETUP = ''' import numpy as np import {mod}.{rng} rs = {mod}.{rng}.RandomState() rs.random_sample() ''' scale_32 = scale_64 = 1 if struct.calcsize('P') == 8 and os.name != 'nt': # 64 bit scale_32 = 0.5 else: scale_64 = 2 RNGS = ['mlfg_1279_861', 'mrg32k3a', 'pcg64', 'pcg32', 'mt19937', 'xorshift128', 'xorshift1024', 'xoroshiro128plus', 'dsfmt', 'random'] def timer(code, setup): return 1000 * min(timeit.Timer(code, setup=setup).repeat(10, 10)) / 10.0
host = MPI.Get_processor_name() info = MPI.INFO_NULL nlocalcores = mp.cpu_count() #One core is manager if rank == 0: """ The rank 0 process is the master manager. This process: 1. Reads the data from the shapefile or DB 2. Generates the W Object 3. Sends the W object and attribute vector to all children """ w = ps.lat2W(8,8) random_int = RandomState(123456789) attribute = random_int.random_sample((w.n, 1)) numifs = 8 data = {'w':w, 'numifs':numifs} print "I have {} cores in a shared memory space".format(nlocalcores) else: data = None #Broadcast 2 sets of data, a list of Python objects and an array of attribute information data = comm.bcast(data, root=0) #Inefficient Python object, better to get full, pass and reform? if rank != 0: w = data['w'] numifs = data['numifs'] attribute = np.empty((w.n, 1), dtype=np.float)
def anytime_explain(self, instance, callback=None, update_func=None, update_prediction=None): data_rows, no_atr = self.data.X.shape class_value = self.model(instance)[0] prng = RandomState(self.seed) self.init_arrays(no_atr) attr_values = self.get_atr_column(instance) batch_mx_size = self.batch_size * no_atr z_sq = abs(st.norm.ppf(self.p_val/2))**2 tiled_inst = self.tile_instance(instance) inst1 = copy.deepcopy(tiled_inst) inst2 = copy.deepcopy(tiled_inst) worst_case = self.max_iter*no_atr time_point = time.time() update_table = False domain = Domain([ContinuousVariable("Score"), ContinuousVariable("Error")], metas=[StringVariable(name="Feature"), StringVariable(name = "Value")]) if update_prediction is not None: update_prediction(class_value) def create_res_table(): nonzero = self.steps != 0 expl_scaled = (self.expl[nonzero]/self.steps[nonzero]).reshape(1, -1) # creating return array ips = np.hstack((expl_scaled.T, np.sqrt( z_sq * self.var[nonzero] / self.steps[nonzero]).reshape(-1, 1))) table = Table.from_numpy(domain, ips, metas=np.hstack((np.asarray(self.atr_names)[nonzero[0]].reshape(-1, 1), attr_values[nonzero[0]].reshape(-1,1)))) return table while not(all(self.iterations_reached[0, :] > self.max_iter)): prog = 1 - np.sum(self.max_iter - self.iterations_reached)/worst_case if (callback(int(prog*100))): break if not(any(self.iterations_reached[0, :] > self.max_iter)): a = np.argmax(prng.multinomial( 1, pvals=(self.var[0, :]/(np.sum(self.var[0, :]))))) else: a = np.argmin(self.iterations_reached[0, :]) perm = (prng.random_sample(batch_mx_size).reshape( self.batch_size, no_atr)) > 0.5 rand_data = self.data.X[prng.randint(0, data_rows, size=self.batch_size), :] inst1.X = np.copy(tiled_inst.X) inst1.X[perm] = rand_data[perm] inst2.X = np.copy(inst1.X) inst1.X[:, a] = tiled_inst.X[:, a] inst2.X[:, a] = rand_data[:, a] f1 = self._get_predictions(inst1, class_value) f2 = self._get_predictions(inst2, class_value) diff = np.sum(f1 - f2) self.expl[0, a] += diff # update variance self.steps[0, a] += self.batch_size self.iterations_reached[0, a] += self.batch_size d = diff - self.mu[0, a] self.mu[0, a] += d / self.steps[0, a] self.M2[0, a] += d * (diff - self.mu[0, a]) self.var[0, a] = self.M2[0, a] / (self.steps[0, a] - 1) if time.time() - time_point > 1: update_table = True time_point = time.time() if update_table: update_table = False update_func(create_res_table()) # exclude from sampling if necessary needed_iter = z_sq * self.var[0, a] / (self.error**2) if (needed_iter <= self.steps[0, a]) and (self.steps[0, a] >= self.min_iter) or (self.steps[0, a] > self.max_iter): self.iterations_reached[0, a] = self.max_iter + 1 return class_value, create_res_table()
class TestAnalyzer: def setUp(self): self.prng = RandomState(133) self.df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2], 'f1': self.prng.normal(0, 1, 10), 'f2': self.prng.normal(1, 0.1, 10), 'f3': self.prng.normal(2, 0.1, 10), 'group': ['group1'] * 10}, index=range(0, 10)) self.df_features_same_score = self.df_features.copy() self.df_features_same_score[['sc1']] = [3] * 10 self.human_scores = pd.Series(self.prng.randint(1, 5, size=10)) self.system_scores = pd.Series(self.prng.random_sample(10) * 5) self.same_human_scores = pd.Series([3] * 10) # get the directory containing the tests self.test_dir = dirname(__file__) def test_correlation_helper(self): # test that there are no nans for data frame with 10 values retval = Analyzer.correlation_helper(self.df_features, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 0) def test_that_correlation_helper_works_for_data_with_one_row(self): # this should return two data frames with nans # we expect a runtime warning here so let's suppress it with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) retval = Analyzer.correlation_helper(self.df_features[:1], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_that_correlation_helper_works_for_data_with_two_rows(self): # this should return 1/-1 for marginal correlations and nans for # partial correlations retval = Analyzer.correlation_helper(self.df_features[:2], 'sc1', 'group') assert_equal(abs(retval[0].values).sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_that_correlation_helper_works_for_data_with_three_rows(self): # this should compute marginal correlations but return Nans for # partial correlations retval = Analyzer.correlation_helper(self.df_features[:3], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_equal(retval[1].isnull().values.sum(), 3) def test_that_correlation_helper_works_for_data_with_four_rows(self): # this should compute marginal correlations and return a unity # matrix for partial correlations retval = Analyzer.correlation_helper(self.df_features[:4], 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 0) assert_almost_equal(abs(retval[1].values).sum(), 3) def test_that_correlation_helper_works_for_data_with_the_same_label(self): # this should return two data frames with nans with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) retval = Analyzer.correlation_helper(self.df_features_same_score, 'sc1', 'group') assert_equal(retval[0].isnull().values.sum(), 3) assert_equal(retval[1].isnull().values.sum(), 3) def test_that_metrics_helper_works_for_data_with_one_row(self): # There should be NaNs for SMD, correlations and both sds with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) evals = Analyzer.metrics_helper(self.human_scores[0:1], self.system_scores[0:1]) assert_equal(evals.isnull().values.sum(), 4) def test_that_metrics_helper_works_for_data_with_the_same_label(self): # There should be NaNs for correlation. # Note that for a dataset with a single response # kappas will be 0 or 1 with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) evals = Analyzer.metrics_helper(self.same_human_scores, self.system_scores) assert_equal(evals.isnull().values.sum(), 1) def test_metrics_helper_population_sds(self): df_new_features = pd.read_csv(join(self.test_dir, 'data', 'files', 'train.csv')) # compute the metrics when not specifying the population SDs computed_metrics1 = Analyzer.metrics_helper(df_new_features['score'], df_new_features['score2']) expected_metrics1 = pd.Series({'N': 500.0, 'R2': 0.65340566606389394, 'RMSE': 0.47958315233127197, 'SMD': 0.036736365006090885, 'adj_agr': 100.0, 'corr': 0.82789026370069529, 'exact_agr': 77.0, 'h_max': 6.0, 'h_mean': 3.4199999999999999, 'h_min': 1.0, 'h_sd': 0.81543231461565147, 'kappa': 0.6273493195074531, 'sys_max': 6.0, 'sys_mean': 3.4500000000000002, 'sys_min': 1.0, 'sys_sd': 0.81782496620652367, 'wtkappa': 0.82732732732732728}) # and now compute them specifying the population SDs computed_metrics2 = Analyzer.metrics_helper(df_new_features['score'], df_new_features['score2'], population_human_score_sd=0.5, population_system_score_sd=0.4) # the only number that should change is the SMD expected_metrics2 = expected_metrics1.copy() expected_metrics2['SMD'] = 0.066259 assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index()) assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index()) def test_compute_pca_less_components_than_features(self): # test pca when we have less components than features df = pd.DataFrame({'a': range(100)}) for i in range(100): df[i] = df['a'] * i (components, variance) = Analyzer.compute_pca(df, df.columns) assert_equal(len(components.columns), 100) assert_equal(len(variance.columns), 100) def test_compute_disattenuated_correlations_single_human(self): hm_corr = pd.Series([0.9, 0.8, 0.6], index=['raw', 'raw_trim', 'raw_trim_round']) hh_corr = pd.Series([0.81], index=['']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_equal(df_dis_corr.loc['raw', 'corr_disattenuated'], 1.0) def test_compute_disattenuated_correlations_matching_human(self): hm_corr = pd.Series([0.9, 0.4, 0.6], index=['All data', 'GROUP1', 'GROUP2']) hh_corr = pd.Series([0.81, 0.64, 0.36], index=['All data', 'GROUP1', 'GROUP2']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, 0.5, 1.0]) def test_compute_disattenuated_correlations_single_matching_human(self): hm_corr = pd.Series([0.9, 0.4, 0.6], index=['All data', 'GROUP1', 'GROUP2']) hh_corr = pd.Series([0.81], index=['All data']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan]) def test_compute_disattenuated_correlations_mismatched_indices(self): hm_corr = pd.Series([0.9, 0.6], index=['All data', 'GROUP2']) hh_corr = pd.Series([0.81, 0.64], index=['All data', 'GROUP1']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 3) assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan]) def test_compute_disattenuated_correlations_negative_human(self): hm_corr = pd.Series([0.9, 0.8], index=['All data', 'GROUP1']) hh_corr = pd.Series([-0.03, 0.64], index=['All data', 'GROUP1']) df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr, hh_corr) assert_equal(len(df_dis_corr), 2) assert_array_equal(df_dis_corr['corr_disattenuated'], [np.nan, 1.0])
class MobilityGenerator(object): """ Generates intermodal mobility for SUMO starting from a synthetic population. """ _conf = None _profiling = None _random_generator = None _sumo_network = None _sumo_parkings = collections.defaultdict(list) _parking_cache = dict() _parking_position = dict() _taz_weights = dict() _buildings_by_taz = dict() _edges_by_taz = dict() _blacklisted_edges = set() _all_trips = collections.defaultdict(dict) def __init__(self, conf, profiling=False): """ Initialize the synthetic population. :param conf: distionary with the configurations :param profiling=False: enable cProfile """ self._conf = conf self._profiling = profiling self._random_generator = RandomState(seed=self._conf['seed']) logging.info('Starting TraCI with file %s.', conf['sumocfg']) sumocfg = '{}/{}'.format(BASE_DIR, conf['sumocfg']) traci.start(['sumo', '-c', sumocfg]) logging.info('Loading SUMO net file %s%s', BASE_DIR, conf['SUMOnetFile']) self._sumo_network = sumolib.net.readNet('{}/{}'.format( BASE_DIR, conf['SUMOnetFile'])) logging.info('Loading SUMO parking lots from file %s%s', BASE_DIR, conf['SUMOadditionals']['parkings']) self._load_parkings('{}/{}'.format( BASE_DIR, conf['SUMOadditionals']['parkings'])) logging.info('Loading TAZ weights from %s%s', BASE_DIR, conf['population']['tazWeights']) self._load_weights_from_csv('{}/{}'.format( BASE_DIR, conf['population']['tazWeights'])) logging.info('Loading buildings weights from %s%s', BASE_DIR, conf['population']['buildingsWeight']) self._load_buildings_weight_from_csv_dir('{}/{}'.format( BASE_DIR, conf['population']['buildingsWeight'])) logging.info('Loading edges in each TAZ from %s%s', BASE_DIR, conf['population']['tazDefinition']) self._load_edges_from_taz('{}/{}'.format( BASE_DIR, conf['population']['tazDefinition'])) logging.info('Computing the number of entities for each vType..') self._compute_vehicles_per_type() def mobility_generation(self): """ Generate the mobility for the synthetic population. """ logging.info('Generating trips for each vType..') self._compute_trips_per_type() def save_mobility(self): """ Save the generated trips to files. """ logging.info('Saving trips files..') self._saving_trips_to_files() @staticmethod def close_traci(): """ Artefact to close TraCI properly. """ logging.info('Closing TraCI.') traci.close() ## ---------------------------------------------------------------------------------------- ## ## Loaders ## ## ---------------------------------------------------------------------------------------- ## def _load_parkings(self, filename): """ Load parkings ids from XML file. """ xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if (child.tag == 'parkingArea' and child.attrib['id'] in self._conf['intermodalOptions']['parkingAreaWhitelist']): edge = child.attrib['lane'].split('_')[0] position = float(child.attrib['startPos']) + 2.5 self._sumo_parkings[edge].append(child.attrib['id']) self._parking_position[child.attrib['id']] = position def _load_weights_from_csv(self, filename): """ Load the TAZ weight from a CSV file. """ with open(filename, 'r') as csvfile: weightreader = csv.reader(csvfile) header = None for row in weightreader: if not header: header = row else: self._taz_weights[int(row[0])] = { header[0]: int(row[0]), header[1]: row[1], header[2]: int(row[2]), header[3]: float(row[3]), 'weight': (int(row[2]) / float(row[3])), } def _load_buildings_weight_from_csv_dir(self, directory): """ Load the buildings weight from multiple CSV files. """ allfiles = [ os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) ] for filename in sorted(allfiles): logging.debug('Loding %s', filename) with open(filename, 'r') as csvfile: weightreader = csv.reader(csvfile) header = None taz = None buildings = [] for row in weightreader: if not header: header = row else: taz = row[0] buildings.append(( float(row[3]), # weight row[4], # generic edge row[5])) # pedestrian edge if len(buildings) < 10: logging.debug('Dropping %s, only %d buildings found.', filename, len(buildings)) continue weighted_buildings = [] cum_sum = 0.0 for weight, g_edge, p_edge in sorted(buildings): cum_sum += weight weighted_buildings.append( (cum_sum, g_edge, p_edge, weight)) self._buildings_by_taz[taz] = weighted_buildings def _load_edges_from_taz(self, filename): """ Load edges from the TAZ file. """ xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if child.tag == 'taz': self._edges_by_taz[ child.attrib['id']] = child.attrib['edges'].split(' ') ## ---------------------------------------------------------------------------------------- ## ## Mobility Generation ## ## ---------------------------------------------------------------------------------------- ## def _compute_vehicles_per_type(self): """ Compute the absolute number of trip that are going to be created for each vechile type, given a population. """ logging.info('Population: %d', self._conf['population']['entities']) for v_type in self._conf['distribution'].keys(): self._conf['distribution'][v_type]['tot'] = int( self._conf['population']['entities'] * self._conf['distribution'][v_type]['perc']) logging.info('\t %s: %d', v_type, self._conf['distribution'][v_type]['tot']) def _normal_departure_time(self): """ Return the departure time, comuted using a normal distribution. """ departure = int( numpy.random.normal(loc=self._conf['peak']['mean'], scale=self._conf['peak']['std'], size=1)) while (departure < self._conf['interval']['begin'] or departure > self._conf['interval']['end']): departure = int( numpy.random.normal(loc=self._conf['peak']['mean'], scale=self._conf['peak']['std'], size=1)) return departure def _compute_trips_per_type(self): """ Compute the trips for the synthetic population for each vType. """ for v_type in self._conf['distribution'].keys(): total = 0 for key, area in self._conf['distribution'][v_type][ 'composition'].items(): vehicles = int(self._conf['distribution'][v_type]['tot'] * area['perc']) logging.info('[%s] Computing %d trips from %s to %s ... ', v_type, vehicles, area['from'], area['to']) if self._profiling: _pr = cProfile.Profile() _pr.enable() for veh_id in tqdm(range(vehicles)): ## Generating departure time _depart = self._normal_departure_time() if _depart not in self._all_trips[v_type].keys(): self._all_trips[v_type][_depart] = [] ## Trip generation # Parking lot at the end of the trip. with_parking = 'withParking' in area.keys( ) and area['withParking'] # Modes for intermodal trips. modes = None if 'modes' in area.keys() and area['modes']: modes = area['modes'] _from = None _to = None # (Intermodal) trip _from, _to, _mode, _stages = self._find_allowed_pair_traci( v_type, modes, _depart, self._conf['taz'][area['from']], self._conf['taz'][area['to']], with_parking) modes = _mode # Fixing the parking lots stops from the configuration. parking_id = None if with_parking: parking_id = self._has_parking_lot(_to) if not parking_id: with_parking = False # Trip creation complete_trip = { 'id': '{}_{}_{}'.format(v_type, key, veh_id), 'depart': _depart, 'from': _from, 'to': _to, 'type': v_type, 'mode': modes, 'withParking': with_parking, 'PLid': parking_id, 'stages': _stages, } complete_trip['sumoTrip'] = self._generate_sumo_trip( complete_trip) self._all_trips[v_type][_depart].append(complete_trip) total += 1 if self._profiling: _pr.disable() _s = io.StringIO() _ps = pstats.Stats(_pr, stream=_s).sort_stats('cumulative') _ps.print_stats(10) print(_s.getvalue()) input("Press any key to continue..") logging.info('Generated %d trips for %s.', total, v_type) ## ---- PARKING AREAS: location and selection ---- ## def _get_parking_id(self, edge): """ Randomly select one of the parkings. """ if not self._sumo_parkings[edge]: return None pos = self._random_generator.randint(0, len(self._sumo_parkings[edge])) return self._sumo_parkings[edge][pos] def _has_parking_lot(self, edge): """ Retrieve the parking area ID. """ parking_id = None if edge in self._sumo_parkings.keys(): parking_id = self._get_parking_id(edge) return parking_id def _check_parkings_cache(self, edge): """ Check among the previously computed results of _find_closest_parking """ if edge in self._parking_cache.keys(): return self._parking_cache[edge] return None def _find_closest_parking(self, edge): """ Given and edge, find the closest parking area. """ distance = sys.float_info.max ret = self._check_parkings_cache(edge) if ret: return ret for p_edge, parkings in self._sumo_parkings.items(): _is_allowed = False for parking in parkings: if parking in self._conf['intermodalOptions'][ 'parkingAreaWhitelist']: _is_allowed = True break if not _is_allowed: continue try: route = traci.simulation.findIntermodalRoute( p_edge, edge, walkFactor=.9, pType="pedestrian") except traci.exceptions.TraCIException: logging.error( '_find_closest_parking: findIntermodalRoute %s -> %s failed.', p_edge, edge) route = None if route: cost = self._cost_from_route(route) if distance > cost: distance = cost ret = p_edge, route if ret: self._parking_cache[edge] = ret return ret logging.fatal('Edge %s is not reachable from any parking lot.', edge) self._blacklisted_edges.add(edge) return None, None ## ---- Functions for _compute_trips_per_type: _find_allowed_pair_traci ---- ## def _find_allowed_pair_traci(self, v_type, modes, departure, from_area, to_area, with_parking): """ Return an origin ad an allowed destination, with mode and route stages. findRoute(self, fromEdge, toEdge, vType="", depart=-1., routingMode=0) findIntermodalRoute( self, fromEdge, toEdge, modes="", depart=-1., routingMode=0, speed=-1., walkFactor=-1., departPos=-1., arrivalPos=-1., departPosLat=-1., pType="", vType="", destStop=""): """ counter = 0 _is_intermodal = False selected_mode = None selected_route = None if modes: _is_intermodal = True if _is_intermodal: od_found = False while not od_found: ## Origin and Destination Selection from_edge, to_edge = self._select_pair(from_area, to_area, True) ## Evaluate all the possible (intermodal) routes solutions = self._find_intermodal_route( from_edge, to_edge, modes, departure, with_parking) if solutions: winner = sorted(solutions)[0] # let the winner win selected_mode = winner[1] selected_route = winner[2] od_found = True counter += 1 if counter % 10 == 0: logging.debug( '%d pairs done, still looking for the good one..', counter) else: route = None while not self._is_valid_route(None, route): ## Origin and Destination Selection from_edge, to_edge = self._select_pair(from_area, to_area) try: route = traci.simulation.findRoute(from_edge, to_edge, vType=v_type) except traci.exceptions.TraCIException: logging.debug( '_find_allowed_pair_traci: findRoute FAILED.') route = None counter += 1 if counter % 10 == 0: logging.debug( '%d pairs done, still looking for the good one..', counter) selected_mode = v_type selected_route = route if counter >= 10: logging.debug('It required %d iterations to find a valid pair.', counter) return from_edge, to_edge, selected_mode, selected_route def _find_intermodal_route(self, from_edge, to_edge, modes, departure, with_parking): """ Evaluate all the possible (intermodal) routes. """ solutions = list() for mode, weight in modes: _last_mile = None _modes, _ptype, _vtype = self._get_mode_parameters(mode) if with_parking and _vtype in self._conf['intermodalOptions'][ 'vehicleAllowedParking']: ## Find the closest parking area p_edge, _last_mile = self._find_closest_parking(to_edge) if _last_mile: try: route = traci.simulation.findIntermodalRoute( from_edge, p_edge, depart=departure, walkFactor=.9, # speed=1.0 modes=_modes, pType=_ptype, vType=_vtype) except traci.exceptions.TraCIException: logging.error( '_find_intermodal_route: findIntermodalRoute w parking FAILED.' ) route = None if (self._is_valid_route(_modes, route) and route[-1].type == tc.STAGE_DRIVING): route[-1].destStop = self._get_parking_id(p_edge) route.extend(_last_mile) solutions.append( (self._cost_from_route(route) * weight, mode, route)) else: try: route = traci.simulation.findIntermodalRoute( from_edge, to_edge, depart=departure, walkFactor=.9, # speed=1.0 modes=_modes, pType=_ptype, vType=_vtype) except traci.exceptions.TraCIException: logging.error( '_find_intermodal_route: findIntermodalRoute wout parking FAILED.' ) route = None if self._is_valid_route(_modes, route): solutions.append( (self._cost_from_route(route) * weight, mode, route)) return solutions ## ---- PAIR SELECTION: origin - destination - mode ---- ## def _select_pair(self, from_area, to_area, pedestrian=False): """ Randomly select one pair, chosing between buildings and TAZ. """ from_taz = str(self._select_taz_from_weighted_area(from_area)) to_taz = str(self._select_taz_from_weighted_area(to_area)) if from_taz in self._buildings_by_taz.keys( ) and to_taz in self._buildings_by_taz.keys(): return self._select_pair_from_taz_wbuildings( self._buildings_by_taz[from_taz][:], self._buildings_by_taz[to_taz][:], pedestrian) return self._select_pair_from_taz(self._edges_by_taz[from_taz][:], self._edges_by_taz[to_taz][:]) def _select_taz_from_weighted_area(self, area): """ Select a TAZ from an area using its weight. """ selection = self._random_generator.uniform(0, 1) total_weight = sum([self._taz_weights[taz]['weight'] for taz in area]) cumulative = 0.0 for taz in area: cumulative += self._taz_weights[taz]['weight'] / total_weight if selection <= cumulative: return taz return None # this is matematically impossible, # if this happens, there is a mistake in the weights. def _valid_pair(self, from_edge, to_edge): """ This is just to avoid a HUGE while condition. sumolib.net.edge.is_fringe() """ from_edge_sumo = self._sumo_network.getEdge(from_edge) to_edge_sumo = self._sumo_network.getEdge(to_edge) if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()): return False if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()): return False if from_edge == to_edge: return False if to_edge in self._blacklisted_edges: return False if not to_edge_sumo.allows('pedestrian'): return False return True def _select_pair_from_taz(self, from_taz, to_taz): """ Randomly select one pair from a TAZ. Important: from_taz and to_taz MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge = from_taz.pop( self._random_generator.randint(0, len(from_taz))) to_edge = to_taz.pop(self._random_generator.randint(0, len(to_taz))) _to = False while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz: if not self._sumo_network.getEdge(to_edge).allows( 'pedestrian') or _to: to_edge = to_taz.pop( self._random_generator.randint(0, len(to_taz))) _to = False else: from_edge = from_taz.pop( self._random_generator.randint(0, len(from_taz))) _to = True return from_edge, to_edge def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings, pedestrian): """ Randomly select one pair from a TAZ. Important: from_buildings and to_buildings MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = True while not self._valid_pair( from_edge, to_edge) and from_buildings and to_buildings: if not self._sumo_network.getEdge(to_edge).allows( 'pedestrian') or _to: to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = False else: from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] _to = True return from_edge, to_edge @staticmethod def _get_weighted_edge(edges, double, pedestrian): """ Return an edge and its position using the cumulative sum of the weigths in the area. """ pos = -1 ret = None for cum_sum, g_edge, p_edge, _ in edges: if ret and cum_sum > double: return ret, pos if pedestrian and p_edge: ret = p_edge elif not pedestrian and g_edge: ret = g_edge elif g_edge: ret = g_edge else: ret = p_edge pos += 1 return edges[-1][1], len(edges) - 1 ## ---- INTERMODAL: modes and route validity ---- ## @staticmethod def _get_mode_parameters(mode): """ Return the correst TraCI parameters for the requested mode. Parameters: _modes, _ptype, _vtype """ if mode == 'public': return 'public', '', '' elif mode == 'bicycle': return 'bicycle', '', 'bicycle' elif mode == 'walk': return '', 'pedestrian', '' return 'car', '', mode @staticmethod def _is_valid_route(mode, route): """ Handle simultaneously findRoute and findIntermodalRoute results. """ if route is None: # traci failed return False elif mode is None: # only for findRoute if route.edges: return True elif mode == 'public': for stage in route: if stage.line: return True elif mode == 'car': for stage in route: if stage.type == tc.STAGE_DRIVING and stage.edges: return True else: for stage in route: if stage.edges: return True return False @staticmethod def _cost_from_route(route): """ Compute the route cost. """ cost = 0.0 for stage in route: cost += stage.cost return cost ## ---------------------------------------------------------------------------------------- ## ## Saving trips to files ## ## ---------------------------------------------------------------------------------------- ## ROUTES_TPL = """<?xml version="1.0" encoding="UTF-8"?> <!-- Monaco SUMO Traffic (MoST) Scenario Copyright (C) 2019 Lara CODECA This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. --> <routes xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/routes_file.xsd"> {trips} </routes>""" VEHICLE = """ <vehicle id="{id}" type="{v_type}" depart="{depart}" departLane="best" arrivalPos="{arrival}">{route}{stop} </vehicle>""" ROUTE = """ <route edges="{edges}"/>""" STOP_PARKING = """ <stop parkingArea="{id}" until="{until}"/>""" PERSON = """ <person id="{id}" type="pedestrian" depart="{depart}">{stages} </person>""" WALK = """ <walk edges="{edges}"/>""" WALK_BUS = """ <walk edges="{edges}" busStop="{busStop}"/>""" RIDE_BUS = """ <ride busStop="{busStop}" lines="{lines}" intended="{intended}" depart="{depart}"/>""" RIDE_TRIGGERED = """ <ride from="{from_edge}" to="{to_edge}" lines="{vehicle_id}"/>""" VEHICLE_TRIGGERED = """ <vehicle id="{id}" type="{v_type}" depart="triggered" departLane="best" arrivalPos="{arrival}">{route}{stop} </vehicle>""" def _generate_sumo_trip(self, vehicle): """ Generate the SUMO tag version of the trip. """ all_trips = '' _begin = self._conf['stopUntil']['begin'] _end = self._conf['stopUntil']['end'] if vehicle['type'] == 'pedestrian': triggered = '' stages = '' for stage in vehicle['stages']: if stage.type == tc.STAGE_WALKING: if stage.destStop: stages += self.WALK_BUS.format(edges=' '.join( stage.edges), busStop=stage.destStop) else: stages += self.WALK.format(edges=' '.join(stage.edges)) elif stage.type == tc.STAGE_DRIVING: if stage.line != stage.intended: # intended is the transport id, so it must be different stages += self.RIDE_BUS.format(busStop=stage.destStop, lines=stage.line, intended=stage.intended, depart=stage.depart) else: # triggered vehicle (line = intended) ask why to SUMO. _tr_id = '{}_tr'.format(vehicle['id']) _route = self.ROUTE.format(edges=' '.join(stage.edges)) _stop = '' if stage.destStop: _stop = self.STOP_PARKING.format( id=stage.destStop, until=self._random_generator.randint( _begin, _end)) _arrival = 'random' if _stop: _arrival = self._parking_position[stage.destStop] triggered += self.VEHICLE_TRIGGERED.format( id=_tr_id, v_type=vehicle['mode'], route=_route, stop=_stop, arrival=_arrival) stages += self.RIDE_TRIGGERED.format( from_edge=stage.edges[0], to_edge=stage.edges[-1], vehicle_id=_tr_id) all_trips += triggered all_trips += self.PERSON.format(id=vehicle['id'], depart=vehicle['depart'], stages=stages) else: _route = self.ROUTE.format(edges=' '.join(vehicle['stages'].edges)) _stop = '' if vehicle['withParking']: _stop = self.STOP_PARKING.format( id=vehicle['PLid'], until=self._random_generator.randint(_begin, _end)) _arrival = 'random' if _stop: _arrival = self._parking_position[vehicle['PLid']] all_trips += self.VEHICLE.format(id=vehicle['id'], v_type=vehicle['type'], depart=vehicle['depart'], route=_route, stop=_stop, arrival=_arrival) return all_trips def _saving_trips_to_files(self): """ Saving all te trips to files divided by vType. """ _begin = self._conf['stopUntil']['begin'] _end = self._conf['stopUntil']['end'] for v_type, dict_trips in self._all_trips.items(): filename = '{}/{}{}.rou.xml'.format(BASE_DIR, self._conf['outputPrefix'], v_type) with open(filename, 'w') as tripfile: all_trips = '' for time in sorted(dict_trips.keys()): for vehicle in dict_trips[time]: all_trips += vehicle['sumoTrip'] tripfile.write(self.ROUTES_TPL.format(trips=all_trips)) logging.info('Saved %s', filename)
class Environment(): """ Loads, stores, interact the SAGA evironment required for the mobility generation. """ def __init__(self, conf, sumo, logger, profiling=False): """ Initialize the synthetic population. :param conf: distionary with the configurations :param sumo: already initialized SUMO simulation (TraCI or LibSUMO) :param profiling=False: enable cProfile """ self._conf = conf self._sumo = sumo self.logger = logger self._max_retry_number = 1000 if 'maxNumTry' in conf: self._max_retry_number = conf['maxNumTry'] self._profiling = profiling self._random_generator = RandomState(seed=self._conf['seed']) self.logger.info('Loading SUMO net file %s', conf['SUMOnetFile']) self.sumo_network = sumolib.net.readNet(conf['SUMOnetFile']) self.logger.info('Loading SUMO parking lots from file %s', conf['SUMOadditionals']['parkings']) self._blacklisted_edges = set() self._sumo_parkings = collections.defaultdict(list) self._parking_cache = dict() self._parking_position = dict() self._load_parkings(conf['SUMOadditionals']['parkings']) self.logger.info('Loading SUMO taxi stands from file %s', conf['intermodalOptions']['taxiStands']) self._sumo_taxi_stands = collections.defaultdict(list) self._taxi_stand_cache = dict() self._taxi_stand_position = dict() self._load_taxi_stands(conf['intermodalOptions']['taxiStands']) self.logger.info('Loading TAZ weights from %s', conf['population']['tazWeights']) self._taz_weights = dict() self._load_weights_from_csv(conf['population']['tazWeights']) self.logger.info('Loading buildings weights from %s', conf['population']['buildingsWeight']) self._buildings_by_taz = dict() self._load_buildings_weight_from_csv_dir( conf['population']['buildingsWeight']) self.logger.info('Loading edges in each TAZ from %s', conf['population']['tazDefinition']) self._edges_by_taz = dict() self._load_edges_from_taz(conf['population']['tazDefinition']) # LOADERS def _load_parkings(self, filename): """ Load parkings ids from XML file. """ if not os.path.isfile(filename): return xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if child.tag != 'parkingArea': continue if child.attrib['id'] not in self._conf['intermodalOptions'][ 'parkingAreaBlacklist']: edge = child.attrib['lane'].split('_')[0] position = float(child.attrib['startPos']) + 2.5 self._sumo_parkings[edge].append(child.attrib['id']) self._parking_position[child.attrib['id']] = position def _load_taxi_stands(self, filename): """ Taxi stands ids from XML file. """ if not os.path.isfile(filename): return xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if child.tag != 'parkingArea': continue if child.attrib['id'] not in self._conf['intermodalOptions'][ 'taxiStandsBlacklist']: edge = child.attrib['lane'].split('_')[0] position = float(child.attrib['startPos']) + 2.5 self._sumo_taxi_stands[edge].append(child.attrib['id']) self._taxi_stand_position[child.attrib['id']] = position def _load_weights_from_csv(self, filename): """ Load the TAZ weight from a CSV file. """ with open(filename, 'r') as csvfile: weightreader = csv.reader(csvfile) header = [] for row in weightreader: if not row: continue # empty line if not header: header = row elif row: # ignoring empty lines self._taz_weights[row[0]] = { header[0]: row[0], header[1]: row[1], header[2]: int(row[2]), header[3]: float(row[3]), 'weight': (int(row[2]) / float(row[3])), } def _load_buildings_weight_from_csv_dir(self, directory): """ Load the buildings weight from multiple CSV files. """ allfiles = [ os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) ] for filename in sorted(allfiles): self.logger.debug('Loding %s', filename) with open(filename, 'r') as csvfile: weightreader = csv.reader(csvfile) header = None taz = None buildings = [] for row in weightreader: if not row: continue # empty line if header is None: header = row else: taz = row[0] buildings.append(( float(row[3]), # weight row[4], # generic edge row[5])) # pedestrian edge if len(buildings) < 10: self.logger.debug('Dropping %s, only %d buildings found.', filename, len(buildings)) continue weighted_buildings = [] cum_sum = 0.0 for weight, g_edge, p_edge in sorted(buildings): cum_sum += weight weighted_buildings.append( (cum_sum, g_edge, p_edge, weight)) self._buildings_by_taz[taz] = weighted_buildings def _load_edges_from_taz(self, filename): """ Load edges from the TAZ file. """ xml_tree = xml.etree.ElementTree.parse(filename).getroot() for child in xml_tree: if child.tag == 'taz': self._edges_by_taz[ child.attrib['id']] = child.attrib['edges'].split(' ') # LANES & EDGES def get_random_lane_from_tazs(self): """ Retrieve a random edge usable by a taxi based on the option "intermodalOptions":"taxiFleetInitialTAZs": ['taz', ...] """ _locations = self._conf['intermodalOptions']['taxiFleetInitialTAZs'] _lane = None _retry_counter = 0 while not _lane and _retry_counter < self._max_retry_number * 100: try: if _locations: _taz = self._random_generator.choice(_locations) _edges = self._edges_by_taz[_taz] _edge = self._random_generator.choice(_edges) else: _edge = self._random_generator.choice( self.sumo_network.getEdges()).getID() _lane = self.get_stopping_lane(_edge, ['taxi', 'passenger']) except sagaexceptions.TripGenerationGenericError: _retry_counter += 1 _lane = None if _lane is None: self.logger.critical( '_get_random_lane_from_TAZs with "%s" generated %d errors, ' 'taxi generation aborted..', pformat(_locations), _retry_counter) return _lane def get_all_neigh_edges(self, origin, distance): """ Returns all the edges reachable from the origin within the given radius. """ _edge_shape = self.sumo_network.getEdge(origin).getShape() x_coord = _edge_shape[-1][0] y_coord = _edge_shape[-1][1] edges = self.sumo_network.getNeighboringEdges(x_coord, y_coord, r=distance) edges = [edge.getID() for edge, _ in edges] return edges def get_arrival_pos_from_edge(self, edge, position): """ If the position is too close to the end, it may genrate error with findIntermodalRoute. """ length = self.sumo_network.getEdge(edge).getLength() if length < self._conf['minEdgeAllowed']: return None if position > length - 1.0: return length - 1.0 if position < 1.0: return 1.0 return position def get_random_pos_from_edge(self, edge): """ Return a random position in the given edge. """ length = self.sumo_network.getEdge(edge).getLength() position = None if length < self._conf['stopBufferDistance']: position = length / 2.0 # avoid the proximity of the intersection begin = self._conf['stopBufferDistance'] / 2.0 end = length - begin position = (end - begin) * self._random_generator.random_sample() + begin self.logger.debug('get_random_pos_from_edge: [%s] %f (%f)', edge, position, length) return position ## ---- PAIR SELECTION: origin - destination - mode ---- ## def _select_pair(self, from_area, to_area, pedestrian=False): """ Randomly select one pair, chosing between buildings and TAZ. """ from_taz = str(self._select_taz_from_weighted_area(from_area)) to_taz = str(self._select_taz_from_weighted_area(to_area)) if from_taz in self._buildings_by_taz.keys( ) and to_taz in self._buildings_by_taz.keys(): return self._select_pair_from_taz_wbuildings( self._buildings_by_taz[from_taz][:], self._buildings_by_taz[to_taz][:], pedestrian) return self._select_pair_from_taz(self._edges_by_taz[from_taz][:], self._edges_by_taz[to_taz][:]) def _select_taz_from_weighted_area(self, area): """ Select a TAZ from an area using its weight. """ selection = self._random_generator.uniform(0, 1) total_weight = sum([self._taz_weights[taz]['weight'] for taz in area]) if total_weight <= 0: error_msg = 'Error with area {}, total sum of weights is {}. '.format( area, total_weight) error_msg += 'It must be strictly positive.' raise Exception(error_msg, [(taz, self._taz_weights[taz]['weight']) for taz in area]) cumulative = 0.0 for taz in area: cumulative += self._taz_weights[taz]['weight'] / total_weight if selection <= cumulative: return taz return None # this is matematically impossible, # if this happens, there is a mistake in the weights. def _valid_pair(self, from_edge, to_edge): """ This is just to avoid a HUGE while condition. sumolib.net.edge.is_fringe() """ from_edge_sumo = self.sumo_network.getEdge(from_edge) to_edge_sumo = self.sumo_network.getEdge(to_edge) if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()): return False if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()): return False if from_edge == to_edge: return False if to_edge in self._blacklisted_edges: return False if not to_edge_sumo.allows('pedestrian'): return False return True def _select_pair_from_taz(self, from_taz, to_taz): """ Randomly select one pair from a TAZ. Important: from_taz and to_taz MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge = from_taz.pop( self._random_generator.randint(0, len(from_taz))) to_edge = to_taz.pop(self._random_generator.randint(0, len(to_taz))) _to = False while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz: if not self.sumo_network.getEdge(to_edge).allows( 'pedestrian') or _to: to_edge = to_taz.pop( self._random_generator.randint(0, len(to_taz))) _to = False else: from_edge = from_taz.pop( self._random_generator.randint(0, len(from_taz))) _to = True return from_edge, to_edge def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings, pedestrian): """ Randomly select one pair from a TAZ. Important: from_buildings and to_buildings MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = True while not self._valid_pair( from_edge, to_edge) and from_buildings and to_buildings: if not self.sumo_network.getEdge(to_edge).allows( 'pedestrian') or _to: to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = False else: from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] _to = True return from_edge, to_edge @staticmethod def _get_weighted_edge(edges, double, pedestrian): """ Return an edge and its position using the cumulative sum of the weigths in the area. """ pos = -1 ret = None for cum_sum, g_edge, p_edge, _ in edges: if ret and cum_sum > double: return ret, pos if pedestrian and p_edge: ret = p_edge elif not pedestrian and g_edge: ret = g_edge elif g_edge: ret = g_edge else: ret = p_edge pos += 1 return edges[-1][1], len(edges) - 1 def get_stopping_lane(self, edge, vtypes=['passenger']): """ Returns the vehicle-friendly stopping lane closer to the sidewalk that respects the configuration parameter 'minEdgeAllowed'. """ for lane in self.sumo_network.getEdge(edge).getLanes(): if lane.getLength() >= self._conf['minEdgeAllowed']: for vtype in vtypes: if lane.allows(vtype): return lane.getID() raise sagaexceptions.TripGenerationGenericError( '"{}" cannot stop on edge {}'.format(vtypes, edge)) ## PARKING AREAS: location and selection def get_parking_position(self, parking_id): """ Returns the position for a given parking. """ return self._parking_position[parking_id] def find_closest_parking(self, edge): """ Given and edge, find the closest parking area. """ distance = sys.float_info.max ret = self._check_parkings_cache(edge) if ret: return ret p_id = None for p_edge, parkings in self._sumo_parkings.items(): for parking in parkings: if parking not in self._conf['intermodalOptions'][ 'parkingAreaBlacklist']: p_id = parking break if p_id: try: route = self._sumo.simulation.findIntermodalRoute( p_edge, edge, pType="pedestrian") except TraCIException: route = None if route and not isinstance(route, list): # list in until SUMO 1.4.0 included, tuple onward route = list(route) if route: cost = sumoutils.cost_from_route(route) if distance > cost: distance = cost ret = p_id, p_edge, route if ret: self._parking_cache[edge] = ret return ret self.logger.fatal('Edge %s is not reachable from any parking lot.', edge) self._blacklisted_edges.add(edge) return None, None, None def _check_parkings_cache(self, edge): """ Check among the previously computed results of _find_closest_parking """ if edge in self._parking_cache.keys(): return self._parking_cache[edge] return None ## ---- PAIR SELECTION: origin - destination - mode ---- ## def select_pair(self, from_area, to_area, pedestrian=False): """ Randomly select one pair, chosing between buildings and TAZ. """ from_taz = str(self._select_taz_from_weighted_area(from_area)) to_taz = str(self._select_taz_from_weighted_area(to_area)) if from_taz in self._buildings_by_taz.keys( ) and to_taz in self._buildings_by_taz.keys(): return self._select_pair_from_taz_wbuildings( self._buildings_by_taz[from_taz][:], self._buildings_by_taz[to_taz][:], pedestrian) return self._select_pair_from_taz(self._edges_by_taz[from_taz][:], self._edges_by_taz[to_taz][:]) def valid_pair(self, from_edge, to_edge): """ This is just to avoid a HUGE while condition. sumolib.net.edge.is_fringe() """ from_edge_sumo = self.sumo_network.getEdge(from_edge) to_edge_sumo = self.sumo_network.getEdge(to_edge) if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()): return False if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()): return False if from_edge == to_edge: return False if to_edge in self._blacklisted_edges: return False if not to_edge_sumo.allows('pedestrian'): return False return True def _select_taz_from_weighted_area(self, area): """ Select a TAZ from an area using its weight. """ selection = self._random_generator.uniform(0, 1) total_weight = sum([self._taz_weights[taz]['weight'] for taz in area]) if total_weight <= 0: error_msg = 'Error with area {}, total sum of weights is {}. '.format( area, total_weight) error_msg += 'It must be strictly positive.' raise Exception(error_msg, [(taz, self._taz_weights[taz]['weight']) for taz in area]) cumulative = 0.0 for taz in area: cumulative += self._taz_weights[taz]['weight'] / total_weight if selection <= cumulative: return taz return None # this is matematically impossible, # if this happens, there is a mistake in the weights. def _select_pair_from_taz(self, from_taz, to_taz): """ Randomly select one pair from a TAZ. Important: from_taz and to_taz MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge = from_taz.pop( self._random_generator.randint(0, len(from_taz))) to_edge = to_taz.pop(self._random_generator.randint(0, len(to_taz))) _to = False while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz: if not self.sumo_network.getEdge(to_edge).allows( 'pedestrian') or _to: to_edge = to_taz.pop( self._random_generator.randint(0, len(to_taz))) _to = False else: from_edge = from_taz.pop( self._random_generator.randint(0, len(from_taz))) _to = True return from_edge, to_edge def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings, pedestrian): """ Randomly select one pair from a TAZ. Important: from_buildings and to_buildings MUST be passed by copy. Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions. """ from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = True while not self._valid_pair( from_edge, to_edge) and from_buildings and to_buildings: if not self.sumo_network.getEdge(to_edge).allows( 'pedestrian') or _to: to_edge, _index = self._get_weighted_edge( to_buildings, self._random_generator.random_sample(), pedestrian) del to_buildings[_index] _to = False else: from_edge, _index = self._get_weighted_edge( from_buildings, self._random_generator.random_sample(), False) del from_buildings[_index] _to = True return from_edge, to_edge @staticmethod def _get_weighted_edge(edges, double, pedestrian): """ Return an edge and its position using the cumulative sum of the weigths in the area. """ pos = -1 ret = None for cum_sum, g_edge, p_edge, _ in edges: if ret and cum_sum > double: return ret, pos if pedestrian and p_edge: ret = p_edge elif not pedestrian and g_edge: ret = g_edge elif g_edge: ret = g_edge else: ret = p_edge pos += 1 return edges[-1][1], len(edges) - 1
class Generator(): seed = None random = None def __init__(self, seed=1): super(Generator, self).__init__() self.random = RandomState(seed) self.seed = seed def reseed(self): self.random = RandomState(self.seed) def randSyllable(self): c1_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will start the syllable s1_dice = ( self.random.random_sample() < 0.05 ) #Chance that a special conjunction consonant is used v1_dice = ( self.random.random_sample() < 0.85 ) #Chance that a regular vowel will be used c2_add_dice = ( self.random.random_sample() < 0.28 ) #Chance that it has an ending consonant c2_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will end the syllable s2_dice = ( self.random.random_sample() < 0.03 ) #Chance that the ending has an addon consonant c1 = self.random.choice(REGULAR_CONSONANTS) if c1_dice else self.random.choice(COMPOSITE_CONSONANTS) s1 = self.random.choice(SPECIAL_CONSONANTS) if s1_dice else '' v1 = self.random.choice(REGULAR_VOWELS) if v1_dice else self.random.choice(COMPOSITE_VOWELS) c2 = ( self.random.choice(REGULAR_CONSONANTS) if c2_dice else self.random.choice(ENDING_CONSONANTS) ) if c2_add_dice else '' s2 = self.random.choice(ADDON_ENDING_CONSONANTS) if s2_dice else '' syllable = c1+s1+v1+c2+s2 # print(syllable) return syllable def randWord(self, s=2): """ s = number of syllables in int """ word = '' for syllable in range(0, s): word += self.randSyllable() return word def randSentence(self, meter=[2, 2, 1, 2, 3, 2, 1, 2, 2]): sentence = [] for syllable in meter: sentence.append(self.randWord(syllable)) return ' '.join(sentence) def randParagraph(self): paragraph = [] rand_wordcount = [ self.random.randint(3, 6) for i in range(0, self.random.randint( 4, 5 )) ] for words in rand_wordcount: rand_meter = [ self.random.randint(1, 4) for i in range(0, words) ] sentence = self.randSentence(rand_meter) paragraph.append(sentence) return '. '.join(paragraph) def randDictionary(self, word_list=['apple', 'banana', 'cake', 'dog', 'elephant', 'fruit', 'guava', 'human', 'island', 'joke', 'king', 'love', 'mother', 'nature', 'ocean', 'pie', 'queen', 'random', 'start', 'tree', 'up', 'vine', 'wisdom', 'yellow', 'zoo' ]): rand_dict_e2r = { word: self.randWord() for word in word_list } rand_dict_r2e = { v: k for k, v in rand_dict_e2r.items() } ordered_e2r = OrderedDict() print("English to Random Language") for key in sorted(rand_dict_e2r.keys()): print(key+ ' : '+rand_dict_e2r[key]) ordered_e2r[key] = rand_dict_e2r[key] ordered_r2e = OrderedDict() print("\n\nRandom Language to English") for key in sorted(rand_dict_r2e.keys()): print(key+ ' : '+rand_dict_r2e[key]) ordered_r2e[key] = rand_dict_r2e[key] return ( ordered_e2r, ordered_r2e ) def convertWord(self, word): word = word.lower() saved_state = self.random.get_state() # Word mapping method : md5 # To make it more natural, this mapping should be updated # to reflect natural language patterns md5 = hashlib.md5(bytes(word, encoding='utf-8')) wordseed = ( self.seed + int.from_bytes(md5.digest(), 'little') ) % (2**31) # print(wordseed) self.random.seed( wordseed ) randword = self.randWord( math.ceil( abs( self.random.normal(2, 1) ) ) ) self.random.set_state(saved_state) return randword def convertSentence(self, sentence): words = sentence.split() converted = [self.convertWord(word) for word in words] return ' '.join(converted)