def add_quotes(the_circus): quote_generator = FakerGenerator(method="sentence", nb_words=6, variable_nb_words=True, seed=next(the_circus.seeder)) person = the_circus.populations["person"] quotes_rel = person.create_relationship("quotes") for _ in range(4): quotes_rel.add_relations( from_ids=person.ids, to_ids=quote_generator.generate(size=person.size) )
def create_random_cells(self, n_cells): """ Creation of a basic population for cells, with latitude and longitude """ cells = Population(size=n_cells) latitude_generator = FakerGenerator(method="latitude", seed=next(self.seeder)) longitude_generator = FakerGenerator(method="longitude", seed=next(self.seeder)) cells.create_attribute("latitude", init_gen=latitude_generator) cells.create_attribute("longitude", init_gen=longitude_generator) return cells
def test_faker_generator_should_delegate_to_faker_correct(): tested_name = FakerGenerator(seed=1234, method="name") some_names = tested_name.generate(10) assert len(some_names) == 10 tested_text = FakerGenerator(seed=1234, method="text") some_text = tested_text.generate(20) assert len(some_text) == 20 tested_address = FakerGenerator(seed=1234, method="address") some_addresses = tested_address.generate(30) assert len(some_addresses) == 30
def step4(): """ Woah, this got drastically slower """ example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) sites = SequencialGenerator(prefix="SITE_").generate(1000) random_site_gen = NumpyRandomGenerator(method="choice", a=sites, seed=next(example1.seeder)) allowed_sites = person.create_relationship(name="sites") for i in range(5): allowed_sites \ .add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size)) hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) duration_gen = NumpyRandomGenerator(method="exponential", scale=60, seed=next(example1.seeder)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), duration_gen.ops.generate(named_as="DURATION"), allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))
def build_uganda_populations(circus): seeder = seed_provider(12345) cells = circus.create_population(name="cells", ids_gen=SequencialGenerator(prefix="CELL_"), size=200) latitude_generator = FakerGenerator(method="latitude", seed=next(seeder)) cells.create_attribute("latitude", init_gen=latitude_generator) longitude_generator = FakerGenerator(method="longitude", seed=next(seeder)) cells.create_attribute("longitude", init_gen=longitude_generator) # the cell "health" is its probability of accepting a call. By default # let's says it's one expected failure every 1000 calls healthy_level_gen = build_healthy_level_gen(next(seeder)) cells.create_attribute(name="HEALTH", init_gen=healthy_level_gen) city_gen = FakerGenerator(method="city", seed=next(seeder)) cities = circus.create_population(name="cities", size=200, ids_gen=city_gen) cell_city_rel = cities.create_relationship("CELLS") cell_city_df = make_random_assign(cells.ids, cities.ids, next(seeder)) cell_city_rel.add_relations( from_ids=cell_city_df["chosen_from_set2"], to_ids=cell_city_df["set1"]) pop_gen = ParetoGenerator(xmin=10000, a=1.4, seed=next(seeder)) cities.create_attribute("population", init_gen=pop_gen) timer_config = CyclicTimerProfile( profile=[1, .5, .2, .15, .2, .4, 3.8, 7.2, 8.4, 9.1, 9.0, 8.3, 8.1, 7.7, 7.4, 7.8, 8.0, 7.9, 9.7, 10.4, 10.5, 8.8, 5.7, 2.8], profile_time_steps="1h", start_date=pd.Timestamp("6 June 2016 00:00:00")) return cells, cities, timer_config
def _add_person_population(self): id_gen = SequencialGenerator(prefix="PERSON_") age_gen = NumpyRandomGenerator(method="normal", loc=3, scale=5, seed=next(self.seeder)) name_gen = FakerGenerator(method="name", seed=next(self.seeder)) person = self.create_population(name="person", size=1000, ids_gen=id_gen) person.create_attribute("NAME", init_gen=name_gen) person.create_attribute("AGE", init_gen=age_gen) quote_generator = FakerGenerator(method="sentence", nb_words=6, variable_nb_words=True, seed=next(self.seeder)) quotes_rel = self.populations["person"].create_relationship("quotes") for w in range(4): quotes_rel.add_relations( from_ids=person.ids, to_ids=quote_generator.generate(size=person.size), weights=w )
def add_agent_reviews_stories(self, agents): """ This illustrates the dynamic creation of new populations: reviews are modeled as "population" (even though they are mostly inactive data container) that are created dynamically and linked to agents. I guess most of the time reviews would be modeled as logs instead of populations, but let's just ignore that for illustration purposes... ^^ """ timegen = HighWeekDaysTimerGenerator(clock=self.clock, seed=next(self.seeder)) review_activity_gen = NumpyRandomGenerator(method="choice", a=range(1, 4), seed=next(self.seeder)) # the system starts with no reviews review_population = self.create_population(name="rev", size=0) review_population.create_attribute("DATE") review_population.create_attribute("TEXT") review_population.create_attribute("AGENT_ID") review_population.create_attribute("AGENT_NAME") reviews = self.create_story( name="agent_reviews", initiating_population=agents, member_id_field="AGENT", timer_gen=timegen, activity_gen=review_activity_gen, ) review_id_gen = SequencialGenerator(start=0, prefix="REVIEW_ID") text_id_gen = FakerGenerator(method="text", seed=next(self.seeder)) reviews.set_operations( self.clock.ops.timestamp(named_as="DATETIME"), agents.ops.lookup(id_field="AGENT", select={"AGENT_NAME": "AGENT_NAME"}), review_id_gen.ops.generate(named_as="REVIEW_ID"), text_id_gen.ops.generate(named_as="REVIEW_TEXT"), review_population.ops.update(id_field="REVIEW_ID", copy_attributes_from_fields={ "DATE": "DATETIME", "TEXT": "REVIEW_TEXT", "AGENT_ID": "AGENT", "AGENT_NAME": "AGENT_NAME", }), # actually, here we're modelling review both as populations and logs.. operations.FieldLogger(log_id="reviews"))
def prepare_dealers(circus, params): """ updates the dist_l1 and dist_l2 populations with product stock and link from dist_l1 to telcos """ for level in ["l1", "l2"]: actor_name = "dist_{}".format(level) logging.info("prepare {} actor".format(actor_name)) dealers = circus.actors[actor_name] pos_per_dealer = circus.actors["pos"].size / dealers.size dealers.create_attribute("DISTRIBUTOR_SALES_REP_NAME", init_gen=snd_constants.gen( "CONTACT_NAMES", next(circus.seeder))) dealers.create_attribute("DISTRIBUTOR_SALES_REP_PHONE", init_gen=FakerGenerator(method="phone_number", seed=next( circus.seeder))) for product, description in params["products"].items(): logging.info("generating {} initial {} stock".format( actor_name, product)) init_stock_size_gen = patterns.scale_quantity_gen( stock_size_gen=circus.generators[ "pos_{}_init_stock_size_gen".format(product)], scale_factor=pos_per_dealer) product_id_gen = circus.generators["{}_id_gen".format(product)] stock_gen = init_stock_size_gen.flatmap( DependentBulkGenerator(element_generator=product_id_gen)) dealers.create_stock_relationship_grp(name=product, stock_bulk_gen=stock_gen) # no need to connect dist l2 to l1: that comes from the belgium component logging.info("connecting all dist_l1 to telco, for each product") telcos = circus.actors["telcos"] dist_l1 = circus.actors["dist_l1"] for product in params["products"].keys(): rel = dist_l1.create_relationship(name="{}__provider".format(product)) # TODO: this assumes we have only one telco (I guess it's ok...) rel.add_relations(from_ids=dist_l1.ids, to_ids=telcos.ids[0])
def add_pos(circus, params): logging.info("creating {} POS".format(params["n_pos"])) pos = circus.create_population(name="pos", size=params["n_pos"], ids_gen=SequencialGenerator(prefix="POS_")) _create_attractiveness_attributes(circus, pos) logging.info("assigning a site to each POS") # probability of each site to be chosen, based on geo_level1 population site_weight = circus.actors["sites"] \ .get_attribute("GEO_LEVEL_1_POPULATION") \ .get_values(None) site_gen = NumpyRandomGenerator(method="choice", seed=next(circus.seeder), a=circus.actors["sites"].ids, p=site_weight.values / sum(site_weight)) pos.create_attribute("SITE", init_gen=site_gen) # generate a random pos location from around the SITE location _add_pos_latlong(circus, params) pos.create_attribute("MONGO_ID", init_gen=MongoIdGenerator()) pos.create_attribute("AGENT_NAME", init_gen=snd_constants.gen("POS_NAMES", next(circus.seeder))) pos.create_attribute("CONTACT_NAME", init_gen=snd_constants.gen("CONTACT_NAMES", next(circus.seeder))) pos.create_attribute("CONTACT_PHONE", init_gen=FakerGenerator(method="phone_number", seed=next(circus.seeder))) logging.info("recording the list POS per site in site relationship") pos_rel = circus.actors["sites"].create_relationship("POS") pos_rel.add_relations(from_ids=pos.get_attribute_values("SITE"), to_ids=pos.ids) for product, description in params["products"].items(): _init_pos_product(circus, product, description)
def create_agents(self): """ Create the AGENT population (i.e. customer) together with its "SIM" labeled stock, to keep track of which SIMs are own by which agent """ logging.info("Creating agents ") agents = self.create_population(name="agents", size=params["n_agents"], ids_gen=SequencialGenerator( prefix="AGENT_", max_length=3)) agents.create_relationship(name="SIM") agents.create_attribute(name="AGENT_NAME", init_gen=FakerGenerator(seed=next(self.seeder), method="name")) # note: the SIM multi-attribute is not initialized with any SIM: agents # start with no SIM return agents
def step2(): example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))
def create_circus_with_population(): example_circus = circus.Circus( name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example_circus.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute( "NAME", init_gen=FakerGenerator(method="name", seed=next(example_circus.seeder))) person.create_attribute( "age", init_gen=NumpyRandomGenerator( method="normal", loc=35, scale=5, seed=next(example_circus.seeder))) return example_circus
timer_gen=WorkHoursTimerGenerator( clock=the_circus.clock, seed=next(the_circus.seeder)) ) hello_world.set_operations( # adding a random timestamp, within the current clock step the_circus.clock .ops .timestamp(named_as="TIME"), # message is now a random sentence from Faker FakerGenerator(method="sentence", nb_words=6, variable_nb_words=True, seed=next(the_circus.seeder) ) .ops .generate(named_as="MESSAGE"), # selecting a random "other person" the_circus.populations["person"] .ops .select_one(named_as="OTHER_PERSON"), the_circus.populations["person"] .ops .lookup(id_field="PERSON_ID", select={"NAME": "EMITTER_NAME"}), the_circus.populations["person"] .ops
from trumania.components.time_patterns.profilers import DefaultDailyTimerGenerator util_functions.setup_logging() example_circus = circus.Circus(name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) # person population id_gen = SequencialGenerator(prefix="PERSON_") age_gen = NumpyRandomGenerator(method="normal", loc=3, scale=5, seed=next(example_circus.seeder)) name_gen = FakerGenerator(method="name", seed=next(example_circus.seeder)) person = example_circus.create_population(name="person", size=1000, ids_gen=id_gen) person.create_attribute("NAME", init_gen=name_gen) person.create_attribute("AGE", init_gen=age_gen) # basic relationship to store people's quote quote_generator = FakerGenerator(method="sentence", nb_words=6, variable_nb_words=True, seed=next(example_circus.seeder)) quotes_rel = example_circus.populations["person"].create_relationship("quotes")
from trumania.core.random_generators import SequencialGenerator, FakerGenerator, NumpyRandomGenerator, ConstantDependentGenerator, ConstantGenerator import trumania.core.util_functions as util_functions util_functions.setup_logging() example_circus = circus.Circus(name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) id_gen = SequencialGenerator(prefix="PERSON_") age_gen = NumpyRandomGenerator(method="normal", loc=3, scale=5, seed=next(example_circus.seeder)) name_gen = FakerGenerator(method="name", seed=next(example_circus.seeder)) person = example_circus.create_population(name="person", size=1000, ids_gen=id_gen) person.create_attribute("NAME", init_gen=name_gen) person.create_attribute("AGE", init_gen=age_gen) hello_world = example_circus.create_story( name="hello_world", initiating_population=example_circus.populations["person"], member_id_field="PERSON_ID", timer_gen=ConstantDependentGenerator(value=1)) hello_world.set_operations( example_circus.clock.ops.timestamp(named_as="TIME"),
from trumania.core.random_generators import SequencialGenerator, FakerGenerator, NumpyRandomGenerator util_functions.setup_logging() logging.info("building circus") example = circus.Circus(name="example", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next(example.seeder))) person.create_attribute("age", init_gen=NumpyRandomGenerator(method="normal", loc=35, scale=5, seed=next( example.seeder))) example.run(duration=pd.Timedelta("48h"), log_output_folder="output/example2", delete_existing_logs=True) logging.info( "10 first persons: \n" + tabulate(person.to_dataframe().head(10), headers='keys', tablefmt='psql'))
def step7(): example1 = circus.Circus(name="example1", master_seed=12345, start=pd.Timestamp("1 Jan 2017 00:00"), step_duration=pd.Timedelta("1h")) person = example1.create_population( name="person", size=1000, ids_gen=SequencialGenerator(prefix="PERSON_")) person.create_attribute("NAME", init_gen=FakerGenerator(method="name", seed=next( example1.seeder))) person.create_attribute("POPULARITY", init_gen=NumpyRandomGenerator( method="uniform", low=0, high=1, seed=next(example1.seeder))) sites = SequencialGenerator(prefix="SITE_").generate(1000) random_site_gen = NumpyRandomGenerator(method="choice", a=sites, seed=next(example1.seeder)) allowed_sites = person.create_relationship(name="sites") # SITES ------------------ # Add HOME sites allowed_sites.add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.4) # Add WORK sites allowed_sites.add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.3) # Add OTHER sites for i in range(3): allowed_sites \ .add_relations(from_ids=person.ids, to_ids=random_site_gen.generate(person.size), weights=0.1) # FRIENDS ------------------ friends = person.create_relationship(name="friends") friends_df = pd.DataFrame.from_records( make_random_bipartite_data( person.ids, person.ids, p=0.005, # probability for a node to be connected to # another one : 5 friends on average = 5/1000 seed=next(example1.seeder)), columns=["A", "B"]) friends.add_relations(from_ids=friends_df["A"], to_ids=friends_df["B"]) # PRICE ------------------ def price(story_data): result = pd.DataFrame(index=story_data.index) result["PRICE"] = story_data["DURATION"] * 0.05 result["CURRENCY"] = "EUR" return result # STORIES ------------------ hello_world = example1.create_story( name="hello_world", initiating_population=person, member_id_field="PERSON_ID", # after each story, reset the timer to 0, so that it will get # executed again at the next clock tick (next hour) timer_gen=ConstantDependentGenerator(value=0)) duration_gen = NumpyRandomGenerator(method="exponential", scale=60, seed=next(example1.seeder)) hello_world.set_operations( person.ops.lookup(id_field="PERSON_ID", select={"NAME": "NAME"}), ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"), duration_gen.ops.generate(named_as="DURATION"), friends.ops.select_one( from_field="PERSON_ID", named_as="COUNTERPART_ID", weight=person.get_attribute_values("POPULARITY"), # For people that do not have friends, it will try to find # the POPULARITY attribute of a None and crash miserably # Adding this flag will discard people that do not have friends discard_empty=True), person.ops.lookup(id_field="COUNTERPART_ID", select={"NAME": "COUNTER_PART_NAME"}), allowed_sites.ops.select_one(from_field="PERSON_ID", named_as="SITE"), allowed_sites.ops.select_one(from_field="COUNTERPART_ID", named_as="COUNTERPART_SITE"), Apply(source_fields=["DURATION", "SITE", "COUNTERPART_SITE"], named_as=["PRICE", "CURRENCY"], f=price, f_args="dataframe"), example1.clock.ops.timestamp(named_as="TIME"), FieldLogger(log_id="hello")) example1.run(duration=pd.Timedelta("48h"), log_output_folder="output/example1", delete_existing_logs=True) with open("output/example1/hello.csv") as f: print("Logged {} lines".format(len(f.readlines()) - 1))