def lambda_handler(event, context): uuid = event['campaign_uuid'] type_name = event['type'] type_id = type_name.replace(' ', '_') campaign = fetch_campaign(campaign_path(uuid)) for type_key in campaign['types']: if campaign['types'][type_key]['type'] == type_name: typee = campaign['types'][type_key] download_overpass_file(uuid, type_id) xml_file = open('/tmp/{type_id}.xml'.format(type_id=type_id), 'r') tag_name = typee['feature'].split('=')[0] start_date = calendar.timegm( datetime.datetime.strptime(campaign['start_date'], '%Y-%m-%d').timetuple()) * 1000 end_date = calendar.timegm( datetime.datetime.strptime(campaign['end_date'], '%Y-%m-%d').timetuple()) * 1000 sorted_user_list = osm_object_contributions(xml_file, tag_name, start_date, end_date) save_data(uuid, type_id, sorted_user_list)
def test_raster_warping_does_not_overclip_source(): lyrSrs = "+init=epsg:32630" mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' lyr = mapnik.Layer('dataraster', lyrSrs) if 'gdal' in mapnik.DatasourceCache.plugin_names(): lyr.datasource = mapnik.Gdal( file='../data/raster/dataraster.tif', band=1, ) sym = mapnik.RasterSymbolizer() sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color(255, 255, 0)) rule = mapnik.Rule() rule.symbols.append(sym) style = mapnik.Style() style.rules.append(rule) _map = mapnik.Map(256, 256, mapSrs) _map.background = mapnik.Color('white') _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) _map.zoom_to_box(mapnik.Box2d(3, 42, 4, 43)) im = mapnik.Image(_map.width, _map.height) mapnik.render(_map, im) # save a png somewhere so we can see it save_data('test_raster_warping_does_not_overclip_source.png', im.tostring('png')) assert im.view(0, 200, 1, 1).tostring() == '\xff\xff\x00\xff'
def grid_search(self, kwargs): make_dir("../evaluations") wordNgrams = kwargs["wordNgrams"] bucket = kwargs["bucket"] lr = kwargs["lr"] dim = kwargs["dim"] epoch = kwargs["epoch"] loss = kwargs["loss"] args = product(wordNgrams, bucket, lr, dim, epoch, loss) for combinations in args: kwargs["wordNgrams"] = combinations[0] kwargs["bucket"] = int(combinations[1]) kwargs["lr"] = combinations[2] kwargs["dim"] = combinations[3] kwargs["epoch"] = combinations[4] kwargs["loss"] = combinations[5] parameters = " ".join( map(str, [ kwargs["wordNgrams"], kwargs["bucket"], kwargs["lr"], kwargs["dim"], kwargs["epoch"], kwargs["loss"] ])) self.trainClassifier(**kwargs) results = "{}\n{}\n\n".format(parameters, self.testClassifier(kwargs["name"])) save_data(directory="../evaluations", name="results.txt", docs=results, mode="a")
def test_raster_with_alpha_blends_correctly_with_background(): WIDTH = 500 HEIGHT = 500 map = mapnik.Map(WIDTH, HEIGHT) WHITE = mapnik.Color(255, 255, 255) map.background = WHITE style = mapnik.Style() rule = mapnik.Rule() symbolizer = mapnik.RasterSymbolizer() symbolizer.scaling = mapnik.scaling_method.BILINEAR rule.symbols.append(symbolizer) style.rules.append(rule) map.append_style('raster_style', style) map_layer = mapnik.Layer('test_layer') filepath = '../data/raster/white-alpha.png' if 'gdal' in mapnik.DatasourceCache.instance().plugin_names(): map_layer.datasource = mapnik.Gdal(file=filepath) map_layer.styles.append('raster_style') map.layers.append(map_layer) map.zoom_all() mim = mapnik.Image(WIDTH, HEIGHT) mapnik.render(map, mim) save_data('test_raster_with_alpha_blends_correctly_with_background.png', mim.tostring('png')) imdata = mim.tostring() # All white is expected assert contains_word('\xff\xff\xff\xff', imdata)
def test_raster_warping_does_not_overclip_source(): lyrSrs = "+init=epsg:32630" mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' lyr = mapnik.Layer('dataraster', lyrSrs) if 'gdal' in mapnik.DatasourceCache.instance().plugin_names(): lyr.datasource = mapnik.Gdal( file = '../data/raster/dataraster.tif', band = 1, ) sym = mapnik.RasterSymbolizer() sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color(255,255,0)) rule = mapnik.Rule() rule.symbols.append(sym) style = mapnik.Style() style.rules.append(rule) _map = mapnik.Map(256,256, mapSrs) _map.background=mapnik.Color('white') _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) _map.zoom_to_box(mapnik.Box2d(3,42,4,43)) im = mapnik.Image(_map.width,_map.height) mapnik.render(_map, im) # save a png somewhere so we can see it save_data('test_raster_warping_does_not_overclip_source.png', im.tostring('png')) assert im.view(0,200,1,1).tostring()=='\xff\xff\x00\xff'
def test_raster_warping(): lyrSrs = "+init=epsg:32630" mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' lyr = mapnik.Layer('dataraster', lyrSrs) if 'gdal' in mapnik.DatasourceCache.instance().plugin_names(): lyr.datasource = mapnik.Gdal( file = '../data/raster/dataraster.tif', band = 1, ) sym = mapnik.RasterSymbolizer() sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color(255,255,0)) rule = mapnik.Rule() rule.symbols.append(sym) style = mapnik.Style() style.rules.append(rule) _map = mapnik.Map(256,256, mapSrs) _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) prj_trans = mapnik.ProjTransform(mapnik.Projection(mapSrs), mapnik.Projection(lyrSrs)) _map.zoom_to_box(prj_trans.backward(lyr.envelope())) im = mapnik.Image(_map.width,_map.height) mapnik.render(_map, im) # save a png somewhere so we can see it save_data('test_raster_warping.png', im.tostring('png')) imdata = im.tostring() assert contains_word('\xff\xff\x00\xff', imdata)
def main(event, context): logger.info('got event{}'.format(event)) uuid = event['campaign_uuid'] type_name = event['type'] type_id = type_name.replace(' ', '_') campaign = fetch_campaign( campaign_path=campaign_path(uuid)) for type_key in campaign['types']: if campaign['types'][type_key]['type'] == type_name: typee = campaign['types'][type_key] download_overpass_file(uuid, type_id) xml_file = open('/tmp/{type_id}.xml'.format(type_id=type_id), 'r') parser = CountFeatureParser(typee['feature']) try: xml.sax.parse(xml_file, parser) except xml.sax.SAXParseException: print('FAIL') output = { 'type_id': type_id, 'type_name': type_name, 'piechart': to_piechart(parser.count) } save_data(uuid, type_id, output)
def generate_challenge(key, mac_key, challenge_size=32, bytes_per_hash=1, hash_function="sha256", unencrypted_data='', answer=bytes()): """ Create a challenge that only the holder of key should be able to solve. mac_key is required to assure integrity and authenticity of the challenge to the client. challenge_size is the total amount of data the client must crack. A random challenge of challenge_size is generated, and separated into challenge_size / bytes_per_hash subchallenges. The time taken to crack a single subchallenge is O(2**n) (? not sure!), where n is the number of bytes_per_hash. hash_function is a string name of an algorithm available in the hashlib module unencrypted_data is an optional string of data to be packaged with the challenge. The data is not kept confidential, but possesses integrity and authenticity because of the message authentication code over the entire package. answer is an optional string, that when supplied, is used instead of a random challenge. If supplied, the challenge_size argument has no effect. """ answer = answer or random._urandom(challenge_size) challenge = encrypt(answer, key, hmac_factory(hash_function), input_block_size=bytes_per_hash) package = save_data(challenge, bytes_per_hash, unencrypted_data) return (save_data(generate_mac(mac_key, package, hash_function), hash_function, package), answer)
def main(event, context): logger.info('got event{}'.format(event)) uuid = event['campaign_uuid'] type_name = event['type'] type_id = type_name.replace(' ', '_') campaign = fetch_campaign(campaign_path(uuid)) for type_key in campaign['types']: if campaign['types'][type_key]['type'] == type_name: typee = campaign['types'][type_key] logger.info(typee['tags']) required_tags = fix_tags(typee['tags']) logger.info(required_tags) render_data_path = build_render_data_path( campaign_path=campaign_path(uuid), type_id=type_id) download_overpass_file(uuid, type_id) if 'element_type' in typee: element_type = typee['element_type'] else: element_type = None xml_file = open('/tmp/{type_id}.xml'.format(type_id=type_id), 'r') parser = FeatureCompletenessParser(required_tags, render_data_path, element_type) try: xml.sax.parse(xml_file, parser) except xml.sax.SAXParseException: print('FAIL') parser.endDocument() processed_data = { 'type_id': type_id, 'type_name': type_name, 'percentage': compute_completeness_pct(features_collected=parser.features_collected, features_completed=parser.features_completed), 'features_collected': parser.features_collected, 'features_completed': parser.features_completed, 'checked_attributes': list(required_tags.keys()), 'geojson_files_count': parser.geojson_file_manager.count, 'errors_files_count': parser.errors_file_manager.count, 'error_ids': parser.error_ids } save_data(uuid, type_id, processed_data) invoke_download_errors(uuid, type_name) invoke_render_feature(uuid, type_name) invoke_process_make_vector_tiles(uuid, type_name)
def test_raster_warping(): lyrSrs = "+init=epsg:32630" mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' lyr = mapnik.Layer('dataraster', lyrSrs) if 'gdal' in mapnik.DatasourceCache.plugin_names(): lyr.datasource = mapnik.Gdal( file='../data/raster/dataraster.tif', band=1, ) sym = mapnik.RasterSymbolizer() sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color(255, 255, 0)) rule = mapnik.Rule() rule.symbols.append(sym) style = mapnik.Style() style.rules.append(rule) _map = mapnik.Map(256, 256, mapSrs) _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) prj_trans = mapnik.ProjTransform(mapnik.Projection(mapSrs), mapnik.Projection(lyrSrs)) _map.zoom_to_box(prj_trans.backward(lyr.envelope())) im = mapnik.Image(_map.width, _map.height) mapnik.render(_map, im) # save a png somewhere so we can see it save_data('test_raster_warping.png', im.tostring('png')) imdata = im.tostring() assert contains_word('\xff\xff\x00\xff', imdata)
def test_multi_tile_policy(): srs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' lyr = mapnik.Layer('raster') if 'raster' in mapnik.DatasourceCache.instance().plugin_names(): lyr.datasource = mapnik.Raster( file = '../data/raster_tiles/${x}/${y}.tif', lox = -180, loy = -90, hix = 180, hiy = 90, multi = 1, tile_size = 256, x_width = 2, y_width = 2 ) lyr.srs = srs _map = mapnik.Map(256, 256, srs) style = mapnik.Style() rule = mapnik.Rule() sym = mapnik.RasterSymbolizer() rule.symbols.append(sym) style.rules.append(rule) _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) _map.zoom_to_box(lyr.envelope()) im = mapnik.Image(_map.width, _map.height) mapnik.render(_map, im) save_data('test_multi_tile_policy.png', im.tostring('png')) # test green chunk eq_(im.view(0,64,1,1).tostring(), '\x00\xff\x00\xff') eq_(im.view(127,64,1,1).tostring(), '\x00\xff\x00\xff') eq_(im.view(0,127,1,1).tostring(), '\x00\xff\x00\xff') eq_(im.view(127,127,1,1).tostring(), '\x00\xff\x00\xff') # test blue chunk eq_(im.view(128,64,1,1).tostring(), '\x00\x00\xff\xff') eq_(im.view(255,64,1,1).tostring(), '\x00\x00\xff\xff') eq_(im.view(128,127,1,1).tostring(), '\x00\x00\xff\xff') eq_(im.view(255,127,1,1).tostring(), '\x00\x00\xff\xff') # test red chunk eq_(im.view(0,128,1,1).tostring(), '\xff\x00\x00\xff') eq_(im.view(127,128,1,1).tostring(), '\xff\x00\x00\xff') eq_(im.view(0,191,1,1).tostring(), '\xff\x00\x00\xff') eq_(im.view(127,191,1,1).tostring(), '\xff\x00\x00\xff') # test magenta chunk eq_(im.view(128,128,1,1).tostring(), '\xff\x00\xff\xff') eq_(im.view(255,128,1,1).tostring(), '\xff\x00\xff\xff') eq_(im.view(128,191,1,1).tostring(), '\xff\x00\xff\xff') eq_(im.view(255,191,1,1).tostring(), '\xff\x00\xff\xff')
def test_multi_tile_policy(): srs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' lyr = mapnik.Layer('raster') if 'raster' in mapnik.DatasourceCache.plugin_names(): lyr.datasource = mapnik.Raster( file = '../data/raster_tiles/${x}/${y}.tif', lox = -180, loy = -90, hix = 180, hiy = 90, multi = 1, tile_size = 256, x_width = 2, y_width = 2 ) lyr.srs = srs _map = mapnik.Map(256, 256, srs) style = mapnik.Style() rule = mapnik.Rule() sym = mapnik.RasterSymbolizer() rule.symbols.append(sym) style.rules.append(rule) _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) _map.zoom_to_box(lyr.envelope()) im = mapnik.Image(_map.width, _map.height) mapnik.render(_map, im) save_data('test_multi_tile_policy.png', im.tostring('png')) # test green chunk eq_(im.view(0,64,1,1).tostring(), '\x00\xff\x00\xff') eq_(im.view(127,64,1,1).tostring(), '\x00\xff\x00\xff') eq_(im.view(0,127,1,1).tostring(), '\x00\xff\x00\xff') eq_(im.view(127,127,1,1).tostring(), '\x00\xff\x00\xff') # test blue chunk eq_(im.view(128,64,1,1).tostring(), '\x00\x00\xff\xff') eq_(im.view(255,64,1,1).tostring(), '\x00\x00\xff\xff') eq_(im.view(128,127,1,1).tostring(), '\x00\x00\xff\xff') eq_(im.view(255,127,1,1).tostring(), '\x00\x00\xff\xff') # test red chunk eq_(im.view(0,128,1,1).tostring(), '\xff\x00\x00\xff') eq_(im.view(127,128,1,1).tostring(), '\xff\x00\x00\xff') eq_(im.view(0,191,1,1).tostring(), '\xff\x00\x00\xff') eq_(im.view(127,191,1,1).tostring(), '\xff\x00\x00\xff') # test magenta chunk eq_(im.view(128,128,1,1).tostring(), '\xff\x00\xff\xff') eq_(im.view(255,128,1,1).tostring(), '\xff\x00\xff\xff') eq_(im.view(128,191,1,1).tostring(), '\xff\x00\xff\xff') eq_(im.view(255,191,1,1).tostring(), '\xff\x00\xff\xff')
def settings_menu(prevMenu): STANDARD_MENU.reset(False) STANDARD_MENU.allow(AudioBox.ALL) STANDARD_MENU.add_callback_function(utilities.tts_change_volume, 'volume', False) STANDARD_MENU.add_callback_function(utilities.tts_change_rate, 'rate', False) STANDARD_MENU.add_item('save and return to previous menu', False) choice = STANDARD_MENU.run('settings', False) if choice == 3: utilities.save_data([ utilities.SPEECH_VOLUME, utilities.SPEECH_RATE ], 'settings.dat') if prevMenu == 'start': start_menu()
def test_renders_with_agg(): sym = mapnik2.GlyphSymbolizer("DejaVu Sans Condensed", mapnik2.Expression("'í'")) sym.allow_overlap = True sym.angle = mapnik2.Expression("[azimuth]+90") #+90 so the top of the glyph points upwards sym.size = mapnik2.Expression("[value]") sym.color = mapnik2.Expression("'#ff0000'") _map = create_map_and_append_symbolyzer(sym) im = mapnik2.Image(_map.width,_map.height) mapnik2.render(_map, im) save_data('agg_glyph_symbolizer.png', im.tostring('png')) assert contains_word('\xff\x00\x00\xff', im.tostring())
def test_renders_with_agg(): sym = mapnik2.GlyphSymbolizer("DejaVu Sans Condensed", mapnik2.Expression("'í'")) sym.allow_overlap = True sym.angle = mapnik2.Expression( "[azimuth]+90") #+90 so the top of the glyph points upwards sym.size = mapnik2.Expression("[value]") sym.color = mapnik2.Expression("'#ff0000'") _map = create_map_and_append_symbolyzer(sym) im = mapnik2.Image(_map.width, _map.height) mapnik2.render(_map, im) save_data('agg_glyph_symbolizer.png', im.tostring('png')) assert contains_word('\xff\x00\x00\xff', im.tostring())
def save(self): # Holds the save data save_data = dict() # Loop over the dialogues and utterances in the model dialogues = [] dialogue_index = 0 for dialogue in self.model.dialogues: tmp_dialogue = dict() dialogue_index += 1 utterances = [] for utterance in dialogue.utterances: tmp_utterance = dict() # Add speaker, text and labels to utterance tmp_utterance['speaker'] = utterance.speaker tmp_utterance['text'] = utterance.text tmp_utterance['ap_label'] = utterance.ap_label tmp_utterance['da_label'] = utterance.da_label # Add slots to utterance if they exist if utterance.slots is not None: tmp_utterance['slots'] = utterance.slots # Add to utterance list utterances.append(tmp_utterance) # Add id, number of utterances, utterance and scenario to dialogue tmp_dialogue['dialogue_id'] = self.model.dataset + "_" + str( dialogue_index) tmp_dialogue['num_utterances'] = dialogue.num_utterances tmp_dialogue['utterances'] = utterances # Add scenario to dialogue if is exists if dialogue.scenario is not None: tmp_dialogue['scenario'] = dialogue.scenario # Add to dialogue list dialogues.append(tmp_dialogue) # Add dataset name and dialogues to save data save_data['dataset'] = self.model.dataset save_data['num_dialogues'] = self.model.num_dialogues save_data['dialogues'] = dialogues # Save data to file utils.save_data(self.data_path, self.dialogue_file, save_data)
def serialize(python_object): if isinstance(python_object, dict): attributes = python_object.copy() elif isinstance(python_object, list): attributes = dict((str(index), item) for index, item in enumerate(python_object)) else: try: attributes = python_object.__reduce__() except (TypeError, AttributeError): attributes = python_object.__dict__.copy() sub_structs = [] for attribute, value in attributes.items(): if isinstance(value, dict) or isinstance(value, tuple) or isinstance(value, list): attributes[attribute] = serialize(value) print "Serealized nested object: ", len(attributes[attribute]) print print attributes[attribute] print sub_structs.append(attribute) attribute_types = dict((key, type(value)) for key, value in attributes.items()) struct_type = new_struct_type(python_object.__class__.__name__, **attribute_types) struct = struct_type(**attributes) for _sub in sub_structs: print "\n\n\nSubstructure info: ", len(getattr(struct, _sub)), getattr(struct, _sub) return utilities.save_data(sub_structs, pack_structure(struct))
def createTestingCorpus(self, df, name): """ Constructs dataframe with test resutls Paras: None Return: None """ df["reviews"] = df["summary"] + ". " + df["reviewText"] for _, temp in df.iterrows(): data = temp.overall + " " + temp.reviews + "\n" save_data("../Dataset/test_set/", "test_{}.txt".format(name), data, mode="a")
def test_dataraster_coloring(): srs = '+init=epsg:32630' lyr = mapnik.Layer('dataraster') if 'gdal' in mapnik.DatasourceCache.plugin_names(): lyr.datasource = mapnik.Gdal( file='../data/raster/dataraster.tif', band=1, ) lyr.srs = srs _map = mapnik.Map(256, 256, srs) style = mapnik.Style() rule = mapnik.Rule() sym = mapnik.RasterSymbolizer() # Assigning a colorizer to the RasterSymbolizer tells the later # that it should use it to colorize the raw data raster sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color("transparent")) for value, color in [ (0, "#0044cc"), (10, "#00cc00"), (20, "#ffff00"), (30, "#ff7f00"), (40, "#ff0000"), (50, "#ff007f"), (60, "#ff00ff"), (70, "#cc00cc"), (80, "#990099"), (90, "#660066"), (200, "transparent"), ]: sym.colorizer.add_stop(value, mapnik.Color(color)) rule.symbols.append(sym) style.rules.append(rule) _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) _map.zoom_to_box(lyr.envelope()) im = mapnik.Image(_map.width, _map.height) mapnik.render(_map, im) # save a png somewhere so we can see it save_data('test_dataraster_coloring.png', im.tostring('png')) imdata = im.tostring() # we have some values in the [20,30) interval so check that they're colored assert contains_word('\xff\xff\x00\xff', imdata)
def test_single(f=None, do_plot=False, do_save=False, title=""): if f is None: if len(sys.argv) > 1: f = float(sys.argv[1]) else: f = 0.4 if len(sys.argv) > 2: max_entities = int(sys.argv[2]) else: max_entities = None g = 0.5 gap_cost = f mai = 1 method = 'binB-LD' method = 'LD' #method = 'mKlau' #method = 'upProgmKlau' method = 'progmKlau' #method = 'isorankn' #method = 'rand' seed = np.random.randint(0, 1000000) seed = 45398 pr, re, f1, o1 = single_cer(f, g=g, gap_cost=gap_cost, seed=seed, method=method, n_input_graphs=2, n_duplicates=30, p_keep_edge=0.8, density_multiplier=1.1, n_entities=50, n_input_graph_nodes=50, max_iters=300, max_algorithm_iterations=mai, shuffle=False, max_entities=max_entities) if do_save: util.save_data(locals(), "single_synthetic_" + title) if do_plot: plt.plot(o1['Zd_scores'], '-x') plt.plot(o1['feasible_scores'], '-o') plt.show()
def test_dataraster_coloring(): srs = '+init=epsg:32630' lyr = mapnik.Layer('dataraster') if 'gdal' in mapnik.DatasourceCache.instance().plugin_names(): lyr.datasource = mapnik.Gdal( file = '../data/raster/dataraster.tif', band = 1, ) lyr.srs = srs _map = mapnik.Map(256,256, srs) style = mapnik.Style() rule = mapnik.Rule() sym = mapnik.RasterSymbolizer() # Assigning a colorizer to the RasterSymbolizer tells the later # that it should use it to colorize the raw data raster sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color("transparent")) for value, color in [ ( 0, "#0044cc"), ( 10, "#00cc00"), ( 20, "#ffff00"), ( 30, "#ff7f00"), ( 40, "#ff0000"), ( 50, "#ff007f"), ( 60, "#ff00ff"), ( 70, "#cc00cc"), ( 80, "#990099"), ( 90, "#660066"), ( 200, "transparent"), ]: sym.colorizer.add_stop(value, mapnik.Color(color)) rule.symbols.append(sym) style.rules.append(rule) _map.append_style('foo', style) lyr.styles.append('foo') _map.layers.append(lyr) _map.zoom_to_box(lyr.envelope()) im = mapnik.Image(_map.width,_map.height) mapnik.render(_map, im) # save a png somewhere so we can see it save_data('test_dataraster_coloring.png', im.tostring('png')) imdata = im.tostring() # we have some values in the [20,30) interval so check that they're colored assert contains_word('\xff\xff\x00\xff', imdata)
def test_renders_with_cairo(): if not mapnik2.has_pycairo(): return sym = mapnik2.GlyphSymbolizer("DejaVu Sans Condensed", mapnik2.Expression("'í'")) sym.allow_overlap = True sym.angle = mapnik2.Expression("[azimuth]+90") #+90 so the top of the glyph points upwards sym.size = mapnik2.Expression("[value]") sym.color = mapnik2.Expression("'#ff0000'") _map = create_map_and_append_symbolyzer(sym) from cStringIO import StringIO import cairo surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 256, 256) mapnik2.render(_map, surface) im = mapnik2.Image.from_cairo(surface) save_data('cairo_glyph_symbolizer.png', im.tostring('png')) assert contains_word('\xff\x00\x00\xff', im.tostring())
def test_renders_with_cairo(): if not mapnik.has_pycairo(): return sym = mapnik.GlyphSymbolizer("DejaVu Sans Condensed", mapnik.Expression("'í'")) sym.allow_overlap = True sym.angle = mapnik.Expression( "[azimuth]+90") #+90 so the top of the glyph points upwards sym.size = mapnik.Expression("[value]") sym.color = mapnik.Expression("'#ff0000'") _map = create_map_and_append_symbolyzer(sym) if _map: from cStringIO import StringIO import cairo surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 256, 256) mapnik.render(_map, surface) im = mapnik.Image.from_cairo(surface) save_data('cairo_glyph_symbolizer.png', im.tostring('png')) assert contains_word('\xff\x00\x00\xff', im.tostring())
def createTrainingCorpus(self, df, name): """ Creates training data set with labels appended to beginging of each label Paras: df: datafframe Returns: None """ df = df.sample(frac=1).reset_index(drop=True) ratings = df["ratings"].tolist() reviews = df["reviews"].tolist() for rating, review in zip(ratings, reviews): doc = "__label__{}".format(rating) + " " + review.strip() doc = " ".join( [word for word in word_tokenize(doc) if len(word) > 1]) save_data("../Dataset/training_processed", "{}.txt".format(name), doc + "\n", mode="a")
def test_raster_with_alpha_blends_correctly_with_background(): WIDTH = 500 HEIGHT = 500 map = mapnik.Map(WIDTH, HEIGHT) WHITE = mapnik.Color(255, 255, 255) map.background = WHITE style = mapnik.Style() rule = mapnik.Rule() symbolizer = mapnik.RasterSymbolizer() #XXX: This fixes it, see http://trac.mapnik.org/ticket/759#comment:3 # (and remove comment when this test passes) #symbolizer.scaling="bilinear_old" rule.symbols.append(symbolizer) style.rules.append(rule) map.append_style('raster_style', style) map_layer = mapnik.Layer('test_layer') filepath = '../data/raster/white-alpha.png' if 'gdal' in mapnik.DatasourceCache.instance().plugin_names(): map_layer.datasource = mapnik.Gdal(file=filepath) map_layer.styles.append('raster_style') map.layers.append(map_layer) map.zoom_all() mim = mapnik.Image(WIDTH, HEIGHT) mapnik.render(map, mim) save_data( 'test_raster_with_alpha_blends_correctly_with_background.png', mim.tostring('png')) imdata = mim.tostring() # All white is expected assert contains_word('\xff\xff\xff\xff', imdata)
def test_raster_with_alpha_blends_correctly_with_background(): WIDTH = 500 HEIGHT = 500 map = mapnik.Map(WIDTH, HEIGHT) WHITE = mapnik.Color(255, 255, 255) map.background = WHITE style = mapnik.Style() rule = mapnik.Rule() symbolizer = mapnik.RasterSymbolizer() #XXX: This fixes it, see http://trac.mapnik.org/ticket/759#comment:3 # (and remove comment when this test passes) #symbolizer.scaling="bilinear_old" rule.symbols.append(symbolizer) style.rules.append(rule) map.append_style('raster_style', style) map_layer = mapnik.Layer('test_layer') filepath = '../data/raster/white-alpha.png' if 'gdal' in mapnik.DatasourceCache.instance().plugin_names(): map_layer.datasource = mapnik.Gdal(file=filepath) map_layer.styles.append('raster_style') map.layers.append(map_layer) map.zoom_all() mim = mapnik.Image(WIDTH, HEIGHT) mapnik.render(map, mim) save_data('test_raster_with_alpha_blends_correctly_with_background.png', mim.tostring('png')) imdata = mim.tostring() # All white is expected assert contains_word('\xff\xff\xff\xff', imdata)
def test_raster_with_alpha_blends_correctly_with_background(): WIDTH = 500 HEIGHT = 500 map = mapnik.Map(WIDTH, HEIGHT) WHITE = mapnik.Color(255, 255, 255) map.background = WHITE style = mapnik.Style() rule = mapnik.Rule() symbolizer = mapnik.RasterSymbolizer() symbolizer.scaling = mapnik.scaling_method.BILINEAR rule.symbols.append(symbolizer) style.rules.append(rule) map.append_style('raster_style', style) map_layer = mapnik.Layer('test_layer') filepath = '../data/raster/white-alpha.png' if 'gdal' in mapnik.DatasourceCache.plugin_names(): map_layer.datasource = mapnik.Gdal(file=filepath) map_layer.styles.append('raster_style') map.layers.append(map_layer) map.zoom_all() mim = mapnik.Image(WIDTH, HEIGHT) mapnik.render(map, mim) save_data( 'test_raster_with_alpha_blends_correctly_with_background.png', mim.tostring('png')) imdata = mim.tostring() # All white is expected assert contains_word('\xff\xff\xff\xff', imdata)
def test_single(f=None, do_plot=False, do_save=False, title=""): if f is None: if len(sys.argv) > 1: f = float(sys.argv[1]) else: f = 0.4 if len(sys.argv) > 2: max_entities = int(sys.argv[2]) else: max_entities = None g = 0.5 gap_cost = f mai = 1 method = 'binB-LD' method = 'LD' #method = 'mKlau' #method = 'upProgmKlau' method = 'progmKlau' #method = 'isorankn' #method = 'rand' seed = np.random.randint(0, 1000000) seed = 45398 pr, re, f1, o1 = single_cer( f, g=g, gap_cost=gap_cost, seed=seed, method=method, n_input_graphs=2, n_duplicates=30, p_keep_edge=0.8, density_multiplier=1.1, n_entities=50, n_input_graph_nodes=50, max_iters=300, max_algorithm_iterations=mai, shuffle=False, max_entities=max_entities) if do_save: util.save_data(locals(), "single_synthetic_" + title) if do_plot: plt.plot(o1['Zd_scores'], '-x') plt.plot(o1['feasible_scores'], '-o') plt.show()
def get_structure_bytestream(structure): format_string = '' fields_format = [] values = [] _values = [] # do nested structs in a second pass afterwards for attribute, _type in structure._fields_: if _type == ctypes.c_char_p: character = str(len(getattr(structure, attribute))) + 's' else: try: character = format_character[_type] except KeyError: if issubclass(_type, ctypes.Structure): _values.append((attribute, _type)) continue else: raise format_string += character fields_format.append((attribute, character)) value = getattr(structure, attribute) if value is None: value = 0 values.append(value) # this is a potentially more readable form of the code that follows #packed_data = utilities.save_data(format_string) + struct.pack(format_string, *values) #for attribute, _type in _values: # packed_data += get_structure_bytestream(getattr(structure, attribute)) #return packed_data name = "{}_{}".format(type(structure).__name__, len(structure._fields_)) # print "Packing values: ", ([name, fields_format, struct.pack(format_string, *values)] + # [get_structure_bytestream(getattr(structure, attribute)) for # attribute, _type in _values]) return utilities.save_data(*[name, fields_format, struct.pack(format_string, *values)] + [(attribute, get_structure_bytestream(getattr(structure, attribute))) for attribute, _type in _values])
start_counter += new_headers_info[index]['length'] return new_headers_info def insert_new_field_to_data(new_field_info, data): output = "" field_name = new_field_info['name'] start = new_field_info['start'] length = new_field_info['length'] + 1 for line in data.split('\n'): if is_header_line(line): output += line[:start] + field_name.ljust(length) + line[start:] elif line == '' or line[0] == ';': output += line else: output += line[:start] + (' ' * length) + line[start:] output += '\n' return output if __name__ == "__main__": tablatal_data = load_data(args.input) header_line = find_header_line(tablatal_data) headers_info = get_headers_info_from_line(header_line) headers_info[-1]['length'] = get_last_field_length(tablatal_data, headers_info) new_field_details = get_new_field_info(headers_info) new_headers_info = add_new_field_to_headers_info(new_field_details, headers_info) new_field_info = new_headers_info[new_field_details['index']] tablatal_data = insert_new_field_to_data(new_field_info, tablatal_data) save_data(tablatal_data, args.output)
def main(): parser = argparse.ArgumentParser(usage='sorry, look at readme...', \ description='arg description', epilog='end') parser.add_argument('inputF', help='write the file name of the input text.') parser.add_argument('-model', help='select Freq or PPMI.', default='PPMI', choices=['Freq', 'PPMI']) parser.add_argument('-outF', help='write the output file name.', default='sample') parser.add_argument('-window', help='define the window size.', type=int, default=2) parser.add_argument('-iter', help='the number of HITS iteration.', type=int, default=300) parser.add_argument('-vocabSize', help='define the vocabulary size. default is all.', type=int, default=None) args = parser.parse_args() # counting co-occurrence util.trace('count the co-occurrence') co_occur, word_occur, context_occur = word_graph.extract_context( args.inputF, args.window) util.trace('vocabulary size of the input data is {}.'.format( len(word_occur))) if args.vocabSize: vocabSize = args.vocabSize else: vocabSize = len(word_occur) # calculate matrix util.trace('make matrix (word-graph)') matrix, vec = word_graph.make_matrix(co_occur, word_occur, context_occur, args.model) # save data (matrix) util.trace('save the matrix') util.save_data(matrix, args.outF + '/pmi_matrix_{}.pickle'.format(args.model)) util.save_data(vec, args.outF + '/pmi_vectorizer_{}.pickle'.format(args.model)) # get the intial vector HITS_obj = hits.HITS(matrix) # matrix is symmetry; authority score is equal to hubness score. util.trace('start HITS') i = HITS_obj.startHITS(args.iter).toarray() util.trace('finish HITS') # write the ranking words by HITS util.trace('write the vocabulary') util.writeVocab(HITS_obj, i, vocabSize, args.outF + '/vocab_file.hits') util.trace('finish program')
def main_menu(task_lists: dict[str, list], current_list: str, arguments: Namespace): verbose = False active_tasks = task_lists.get(current_list) while True: task_lists[current_list] = active_tasks data = format_all_tasks_to_plaintext(task_lists, current_list) save_data(data, os.getenv('TOD_FP')) print_all_tasks(current_list, active_tasks, verbose) raw_command = input('► ') cls() parsed_command = re.match(r'([A-Za-z]*)(\d+)?:?(\d+)?', raw_command).groups() command, selected_number, dest_number = parsed_command if selected_number: selected_number = int(selected_number) if dest_number: dest_number = int(dest_number) number_of_tasks = len(active_tasks) if selected_number is not None \ and selected_number >= number_of_tasks \ and command != 'a': print(C.RED + "No such task.\n" + C.NORMAL) continue cls() if raw_command == '': show_help() elif not command and selected_number is not None: task = active_tasks[selected_number] time_spent_in_seconds = spend_time_on_task(task.get('name'), task.get('notes'), arguments.pomodoro) prev_time_spent_in_seconds = convert_time_spent_to_seconds( task.get('time_spent')) total_time_spent = prev_time_spent_in_seconds + time_spent_in_seconds formatted_time_spent = format_seconds_to_time_spent( total_time_spent) updated_task = {**task, 'time_spent': formatted_time_spent} tasks.update(active_tasks, updated_task, selected_number) print(C.PURPLE + 'Elapsed time added.' + C.NORMAL) elif command == 'aa': cls() while True: task_name, task_notes = task_name_input() if not task_name: break new_task = { 'name': task_name, 'time_spent': '0:00', 'notes': task_notes, 'completed': False } active_tasks = tasks.add(active_tasks, new_task, selected_number) cls() print(C.PURPLE + 'Tasks added.' + C.NORMAL) elif command == 'al': cls() new_list_name = list_name_input() cls() if not new_list_name: print(C.RED + 'No name entered.' + C.NORMAL) continue task_lists[new_list_name] = list() current_list = new_list_name active_tasks = task_lists[current_list] print(C.PURPLE + 'List created.' + C.NORMAL) elif command == 'a': task_name, task_notes = task_name_input() cls() if not task_name: print(C.RED + 'Cannot add empty task.' + C.NORMAL) continue new_task = { 'name': task_name, 'time_spent': '0:00', 'notes': task_notes, 'completed': False } active_tasks = tasks.add(active_tasks, new_task, selected_number) print(C.PURPLE + 'Task added.' + C.NORMAL) elif command == 'b': if selected_number is None: selected_number = task_number_input(number_of_tasks) timestamp_before = int(time.time()) current_number = selected_number current_task = active_tasks[selected_number] print(C.YELLOW + current_task.get('name') + C.NORMAL + '\n') if current_task.get('notes'): print(C.GRAY + current_task.get('notes') + C.NORMAL + '\n') print('Enter your new broken down tasks:\n') while True: current_number += 1 task_name, task_notes = task_name_input() if not task_name: break new_task = { 'name': task_name, 'time_spent': '0:00', 'notes': task_notes, 'completed': False } active_tasks = tasks.add(active_tasks, new_task, current_number) timestamp_after = int(time.time()) time_spent_in_seconds = timestamp_after - timestamp_before prev_time_spent_in_seconds = convert_time_spent_to_seconds( current_task.get('time_spent')) total_time_spent = prev_time_spent_in_seconds + time_spent_in_seconds formatted_time_spent = format_seconds_to_time_spent( total_time_spent) updated_task = {**current_task, 'time_spent': formatted_time_spent} tasks.update(active_tasks, updated_task, selected_number) cls() print(C.PURPLE + 'Tasks added.' + C.NORMAL) elif command == 'c': if selected_number is None: selected_number = task_number_input(number_of_tasks) cls() if selected_number is not None: active_tasks = tasks.set_completion(active_tasks, selected_number) print(C.PURPLE + 'Task updated.' + C.NORMAL) elif command == 'dd': active_tasks = [] print(C.PURPLE + 'Tasks deleted.' + C.NORMAL) elif command == 'dl': list_names = list(task_lists.keys()) print_all_lists(list_names) selected_number = list_number_input(len(list_names)) cls() selected_list = list_names[selected_number] del task_lists[selected_list] if selected_list == current_list: current_list = list_names[0] print(C.PURPLE + 'List deleted.' + C.NORMAL) elif command == 'd': if selected_number is None: selected_number = task_number_input(number_of_tasks) cls() active_tasks = tasks.delete(active_tasks, selected_number) print(C.PURPLE + 'Task deleted.' + C.NORMAL) elif command == 'e': if number_of_tasks == 0: print(C.PURPLE + 'No tasks to edit.' + C.NORMAL) continue if selected_number is None: print_all_tasks(current_list, active_tasks) selected_number = task_number_input(number_of_tasks) cls() task = active_tasks[selected_number] print('\n' + C.BLUE + "Original Task:" + C.NORMAL) name = task['name'] notes = ': ' + task.get('notes') if task.get('notes') else '' time_spent = task['time_spent'] print(f"\n{name}{notes}\n({time_spent})\n") updated_task_name, updated_task_notes = task_name_input( name, task['notes']) updated_time_spent = task_time_input(time_spent) cls() updated_task = { **task, 'name': updated_task_name, 'notes': updated_task_notes, 'time_spent': updated_time_spent } active_tasks = tasks.update(active_tasks, updated_task, selected_number) print(C.PURPLE + 'Task updated.' + C.NORMAL) elif command == 'h': show_help() elif command == 'l': print_all_lists(task_lists) list_names = task_lists.keys() selected_number = list_number_input(len(list_names)) cls() current_list = list(list_names)[selected_number] active_tasks = task_lists.get(current_list) print(C.PURPLE + 'List selected.' + C.NORMAL) elif command == 'ml': if number_of_tasks == 0: print(C.PURPLE + 'No tasks to move.' + C.NORMAL) continue print_all_tasks(current_list, active_tasks) if selected_number is None: selected_number = task_number_input(number_of_tasks) cls() list_names = list(task_lists.keys()) print_all_lists(list_names) destination_list_number = int( input(f'Move task {selected_number} to which list? ')) destination_list = task_lists[list_names[destination_list_number]] cls() active_tasks, destination_list = tasks.move_to_list( active_tasks, destination_list, selected_number) task_lists[list_names[destination_list_number]] = destination_list print(C.PURPLE + 'Task moved.' + C.NORMAL) elif command == 'm': if number_of_tasks == 0: print(C.PURPLE + 'No tasks to move.' + C.NORMAL) continue print_all_tasks(current_list, active_tasks) if selected_number is None: selected_number = task_number_input(number_of_tasks) if dest_number is None: dest_number = input(f'Move task {selected_number} to where? ') if not type(dest_number) == int: cls() print(C.RED + 'Invalid number.' + C.NORMAL) continue cls() dest_number = int(dest_number) active_tasks = tasks.move(active_tasks, selected_number, dest_number) print(C.PURPLE + 'Tasks updated.' + C.NORMAL) elif command == 'n': verbose = True if not verbose else False message = 'Notes are now fully visible.' if verbose else 'Notes are now truncated.' print(C.PURPLE + message + C.NORMAL) elif command == 'q': sys.exit() elif command == 'r': active_tasks = tasks.reduce(active_tasks) print(C.PURPLE + 'Tasks reduced.' + C.NORMAL) elif command == 's': print('Starting new task list...\n') active_tasks = start_new_task_list() cls() elif command == 't': spend_time_on_task('Timer', None) cls() else: print(C.WHITE + "Try 'help' for more information." + C.NORMAL)
def experiment_multiple_trees(n_reps=1, n_trees=5, n_people=500, methods=('unary', 'LD', 'mKlau'), top_k_matches=5, f_vals=(0.1, 0.5, 1, 1.5, 2), title='genealogical', do_save=True, dir_id=None, rep_offset=0): nvv = len(f_vals) res_precision = np.zeros((len(methods), nvv, n_reps)) res_recall = np.zeros((len(methods), nvv, n_reps)) res_fscore = np.zeros((len(methods), nvv, n_reps)) res_t = np.zeros((len(methods), nvv, n_reps)) res_iterations = np.zeros((len(methods), nvv, n_reps)) res_clusters = np.zeros((len(methods), nvv, n_reps)) res_lb = np.zeros((len(methods), nvv, n_reps)) res_ub = np.zeros((len(methods), nvv, n_reps)) t_beg = time.time() start_date_part = str(dt.datetime.now())[:19] start_date_part = re.sub(' ', '_', start_date_part) start_date_part = re.sub(':', '', start_date_part) fname0 = os.path.join("experiment_results", "{}_part_{}.pckl".format(title, start_date_part)) for r in range(n_reps): print "\n--- Repetition {}. ---".format(r + 1) # Generate data tree_files = extract_ft.get_k_fragments( n_trees, n_people, label="first{}".format(r + rep_offset)) people_index_tuples = [] for tf in tree_files: people, people_dict = person.read_people(tf, clean=True) #'family_trees/data/rand_frag_%d/' % i, clean=True) index = create_index(people) people_index_tuples.append((people, index, people_dict)) uniq_people = count_unique_people(tree_files) for i, f in enumerate(f_vals): print "\n rep={}, f={}".format(r + 1, f) for mi, m in enumerate(methods): if m.startswith('meLD') and i > 0: # No need to compute fixed entity method for different f values. continue print "\n rep={}, f={}, method={}\n".format(r + 1, f, m) t0 = time.time() precision, recall, fscore, n_clusters, lb, ub, iters = \ merge_multiple(people_index_tuples, 10, top_k_matches, method=m, uniq_people=uniq_people, f=f) res_precision[mi, i, r] = precision res_recall[mi, i, r] = recall res_fscore[mi, i, r] = fscore res_clusters[mi, i, r] = n_clusters res_t[mi, i, r] = time.time() - t0 res_iterations[mi, i, r] = iters res_lb[mi, i, r] = lb res_ub[mi, i, r] = ub if do_save and n_reps > 1: pickle.dump(locals(), open(fname0, 'wb')) print "Wrote the results of repetition {} to: {}\n".format( r + 1, fname0) print "\nThe whole experiment took {:2f} seconds.".format(time.time() - t_beg) if do_save: fname = util.save_data(locals(), title, dir_name='genealogy{}'.format(str(dir_id))) print "Wrote the results to: {}".format(fname) print "F1 score:", np.mean(res_fscore, axis=2) print "Precision:", np.mean(res_precision, axis=2) print "Recall:", np.mean(res_recall, axis=2) print "Time:", np.mean(res_t, axis=2) print "Clusters:", np.mean(res_clusters, axis=2) print "Lower bounds:", np.mean(res_lb, axis=2) print "Upper bounds:", np.mean(res_ub, axis=2)
def multiplex_experiment(n_reps=10, title='multiplex', do_save=True, dir_id=None): """ Run an experiment on alignining the (anonymized) layers of a multiplex graph. Input: n_reps -- number of repetitions per setting Output: Prints some statistics and stores the results to a file. """ shuffle = True methods = ('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'binB-LD5', 'meLD5_50', 'meLD5_61', 'meLD5_70', 'isorankn', 'LD5') g = 0.5 max_iters = 300 duplicate_names = 3 f_values = [0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 4, 5] nvv = len(f_values) fname = os.path.join('multiplex', 'CS-Aarhus_multiplex.edges') experiment_seed = np.random.randint(0, 1000000) print "--- Experiment seed: {} ---\n".format(experiment_seed) random.seed(experiment_seed) np.random.seed(experiment_seed) res_precision = np.zeros((len(methods), nvv, n_reps)) res_recall = np.zeros((len(methods), nvv, n_reps)) res_fscore = np.zeros((len(methods), nvv, n_reps)) res_iterations = np.zeros((len(methods), nvv, n_reps)) res_t = np.zeros((len(methods), nvv, n_reps)) res_clusters = np.zeros((len(methods), nvv, n_reps)) res_costs = np.zeros((len(methods), nvv, n_reps)) res_lb = np.zeros((len(methods), nvv, n_reps)) # Lower bounds res_ub = np.zeros((len(methods), nvv, n_reps)) # Upper bound t_beg = time.time() date0 = dt.datetime.now() for r in range(n_reps): print "\n Repetition: {}".format(r) Gs = read_multiplex_data(fname, n_duplicate_names=duplicate_names) for i, f in enumerate(f_values): print "\nf={}.\n".format(f) cost_params = {'f': f, 'g': g, 'gap_cost': f} for j, method in enumerate(methods): print "\n method={}, f={}, rep={}".format(method, f, r) max_entities = None mai = 1 if method.startswith('LD') and len(method) > 2: mai = int(method[2:]) method = 'LD' elif method.startswith('binB-LD') and len(method) > 7: mai = int(method[7:]) method = 'binB-LD' elif method.startswith('meLD'): if i > 0: # No need to compute fixed entity method for different f values. continue parts = method.split('_') if len(parts[0]) > 4: mai = int(parts[0][4:]) max_entities = int(parts[1]) method = 'binB-LD' t0 = time.time() x, o = align_multiple_networks( Gs, cost_params, method=method, max_iters=max_iters, max_algorithm_iterations=mai, max_entities=max_entities, shuffle=shuffle) print "Optimization took {:.2f} seconds.".format(time.time() - t0) pr, rec, f1 = o['scores'] res_t[j, i, r] = time.time() - t0 res_precision[j, i, r] = pr res_recall[j, i, r] = rec res_fscore[j, i, r] = f1 res_iterations[j, i, r] = o['iterations'] res_clusters[j, i, r] = o['n_clusters'] res_costs[j, i, r] = o['cost'] res_lb[j, i, r] = o['lb'] res_ub[j, i, r] = o['ub'] if do_save and n_reps > 1: fname0 = util.save_data(locals(), "multiplex", date0) print "Wrote the results of repetition {} to: {}\n".format(r+1, fname0) print "\nThe whole experiment took {:2f} seconds.".format(time.time() - t_beg) if do_save: fname = util.save_data(locals(), title, dir_name='multiplex{}'.format(str(dir_id))) print "Wrote the results to: {}".format(fname) #plot_toy_experiment_results(fname) print "F1 score:", np.mean(res_fscore, axis=2) print "Precision:", np.mean(res_precision, axis=2) print "Recall:", np.mean(res_recall, axis=2) print "Time:", np.mean(res_t, axis=2) print "Iterations:", np.mean(res_iterations, axis=2) print "Clusters:", np.mean(res_clusters, axis=2) print "Costs:", np.mean(res_costs, axis=2) print "Lower bounds:", np.mean(res_lb, axis=2) print "Upper bounds:", np.mean(res_ub, axis=2)
def experiment_template( n_reps, params, varied_param, cv=False, methods=('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'LD5'), title='generic', e_seed=None, dir_id=1000): """ General template for performing experiments. Input: n_reps -- number of repetitions per setting params -- all parameters (the parameter to be varied should be a list) varied_param -- the name of the parameter to be varied cv -- whether to find f and gap_cost through cross-validation Output: Prints some statistics and stores the results to a file. """ shuffle = True if e_seed is None: experiment_seed = np.random.randint(0, 1000000) else: experiment_seed = e_seed # experiment_seed = 48574 # Gt yields a better optimum print "--- Experiment seed: {} ---\n".format(experiment_seed) random.seed(experiment_seed) np.random.seed(experiment_seed) varied_values = params[varied_param] nvv = len(varied_values) p = dict(params) # Current values if 'max_entities' not in p: p['max_entities'] = None res_precision = np.zeros((len(methods), nvv, n_reps)) res_recall = np.zeros((len(methods), nvv, n_reps)) res_fscore = np.zeros((len(methods), nvv, n_reps)) res_iterations = np.zeros((len(methods), nvv, n_reps)) res_t = np.zeros((len(methods), nvv, n_reps)) res_clusters = np.zeros((len(methods), nvv, n_reps)) res_costs = np.zeros((len(methods), nvv, n_reps)) res_lb = np.zeros((len(methods), nvv, n_reps)) # Lower bounds res_ub = np.zeros((len(methods), nvv, n_reps)) # Upper bound seeds = [] for r in range(n_reps): seeds.append(np.random.randint(0, 1000000)) t_beg = time.time() date_beg = dt.datetime.now() for i, val in enumerate(varied_values): p[varied_param] = val if varied_param == 'f': p['gap_cost'] = val print "\n{} {}.\n".format(val, varied_param) if cv: optimal_params = cross_validate_params( 'LD', p['n_input_graphs'], p['n_entities'], p['n_input_graph_nodes'], p['p_keep_edge'], p['density_multiplier'], p['duplicates'], p['max_iters'], 1) p['f'] = optimal_params['f'] optimal_params = cross_validate_params( 'mKlau', p['n_input_graphs'], p['n_entities'], p['n_input_graph_nodes'], p['p_keep_edge'], p['density_multiplier'], p['duplicates'], p['max_iters'], 1) p['gap_cost'] = optimal_params['gap_cost'] for r in range(n_reps): print "\n Repetition: {}".format(r) seed = seeds[r] for j, method in enumerate(methods): print "\n Method: {}\n".format(method) max_entities = None mai = 1 if method.startswith('LD') and len(method) > 2: mai = int(method[2:]) method = 'LD' elif method.startswith('binB-LD') and len(method) > 7: mai = int(method[7:]) method = 'binB-LD' elif method.startswith('meLD'): if i > 0: # No need to compute fixed entity method for different f values. continue parts = method.split('_') if len(parts[0]) > 4: mai = int(parts[0][4:]) max_entities = int(parts[1]) method = 'binB-LD' t0 = time.time() pr, rec, f1, o = single_cer( p['f'], p['g'], p['gap_cost'], seed, method, p['n_input_graphs'], p['n_entities'], p['n_input_graph_nodes'], p['p_keep_edge'], p['density_multiplier'], p['duplicates'], p['max_iters'], mai, shuffle, max_entities) res_t[j, i, r] = time.time() - t0 res_precision[j, i, r] = pr res_recall[j, i, r] = rec res_fscore[j, i, r] = f1 res_iterations[j, i, r] = o['iterations'] res_clusters[j, i, r] = o['n_clusters'] res_costs[j, i, r] = o['cost'] res_lb[j, i, r] = o['lb'] res_ub[j, i, r] = o['ub'] print "\nThe whole experiment took {:2f} seconds.".format(time.time() - t_beg) fname = util.save_data(locals(), "synthetic_" + title, dir_name='multiplex{}'.format(str(dir_id))) #plot_toy_experiment_results(fname) print "F1 score:", np.mean(res_fscore, axis=2) print "Precision:", np.mean(res_precision, axis=2) print "Recall:", np.mean(res_recall, axis=2) print "Time:", np.mean(res_t, axis=2) print "Iterations:", np.mean(res_iterations, axis=2) print "Clusters:", np.mean(res_clusters, axis=2) print "Costs:", np.mean(res_costs, axis=2) print "Lower bounds:", np.mean(res_lb, axis=2) print "Upper bounds:", np.mean(res_ub, axis=2)
def experiment_template(n_reps, params, varied_param, cv=False, methods=('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'LD5'), title='generic', e_seed=None, dir_id=1000): """ General template for performing experiments. Input: n_reps -- number of repetitions per setting params -- all parameters (the parameter to be varied should be a list) varied_param -- the name of the parameter to be varied cv -- whether to find f and gap_cost through cross-validation Output: Prints some statistics and stores the results to a file. """ shuffle = True if e_seed is None: experiment_seed = np.random.randint(0, 1000000) else: experiment_seed = e_seed # experiment_seed = 48574 # Gt yields a better optimum print "--- Experiment seed: {} ---\n".format(experiment_seed) random.seed(experiment_seed) np.random.seed(experiment_seed) varied_values = params[varied_param] nvv = len(varied_values) p = dict(params) # Current values if 'max_entities' not in p: p['max_entities'] = None res_precision = np.zeros((len(methods), nvv, n_reps)) res_recall = np.zeros((len(methods), nvv, n_reps)) res_fscore = np.zeros((len(methods), nvv, n_reps)) res_iterations = np.zeros((len(methods), nvv, n_reps)) res_t = np.zeros((len(methods), nvv, n_reps)) res_clusters = np.zeros((len(methods), nvv, n_reps)) res_costs = np.zeros((len(methods), nvv, n_reps)) res_lb = np.zeros((len(methods), nvv, n_reps)) # Lower bounds res_ub = np.zeros((len(methods), nvv, n_reps)) # Upper bound seeds = [] for r in range(n_reps): seeds.append(np.random.randint(0, 1000000)) t_beg = time.time() date_beg = dt.datetime.now() for i, val in enumerate(varied_values): p[varied_param] = val if varied_param == 'f': p['gap_cost'] = val print "\n{} {}.\n".format(val, varied_param) if cv: optimal_params = cross_validate_params( 'LD', p['n_input_graphs'], p['n_entities'], p['n_input_graph_nodes'], p['p_keep_edge'], p['density_multiplier'], p['duplicates'], p['max_iters'], 1) p['f'] = optimal_params['f'] optimal_params = cross_validate_params( 'mKlau', p['n_input_graphs'], p['n_entities'], p['n_input_graph_nodes'], p['p_keep_edge'], p['density_multiplier'], p['duplicates'], p['max_iters'], 1) p['gap_cost'] = optimal_params['gap_cost'] for r in range(n_reps): print "\n Repetition: {}".format(r) seed = seeds[r] for j, method in enumerate(methods): print "\n Method: {}\n".format(method) max_entities = None mai = 1 if method.startswith('LD') and len(method) > 2: mai = int(method[2:]) method = 'LD' elif method.startswith('binB-LD') and len(method) > 7: mai = int(method[7:]) method = 'binB-LD' elif method.startswith('meLD'): if i > 0: # No need to compute fixed entity method for different f values. continue parts = method.split('_') if len(parts[0]) > 4: mai = int(parts[0][4:]) max_entities = int(parts[1]) method = 'binB-LD' t0 = time.time() pr, rec, f1, o = single_cer( p['f'], p['g'], p['gap_cost'], seed, method, p['n_input_graphs'], p['n_entities'], p['n_input_graph_nodes'], p['p_keep_edge'], p['density_multiplier'], p['duplicates'], p['max_iters'], mai, shuffle, max_entities) res_t[j, i, r] = time.time() - t0 res_precision[j, i, r] = pr res_recall[j, i, r] = rec res_fscore[j, i, r] = f1 res_iterations[j, i, r] = o['iterations'] res_clusters[j, i, r] = o['n_clusters'] res_costs[j, i, r] = o['cost'] res_lb[j, i, r] = o['lb'] res_ub[j, i, r] = o['ub'] print "\nThe whole experiment took {:2f} seconds.".format(time.time() - t_beg) fname = util.save_data(locals(), "synthetic_" + title, dir_name='multiplex{}'.format(str(dir_id))) #plot_toy_experiment_results(fname) print "F1 score:", np.mean(res_fscore, axis=2) print "Precision:", np.mean(res_precision, axis=2) print "Recall:", np.mean(res_recall, axis=2) print "Time:", np.mean(res_t, axis=2) print "Iterations:", np.mean(res_iterations, axis=2) print "Clusters:", np.mean(res_clusters, axis=2) print "Costs:", np.mean(res_costs, axis=2) print "Lower bounds:", np.mean(res_lb, axis=2) print "Upper bounds:", np.mean(res_ub, axis=2)
def multiplex_experiment(n_reps=10, title='multiplex', do_save=True, dir_id=None): """ Run an experiment on alignining the (anonymized) layers of a multiplex graph. Input: n_reps -- number of repetitions per setting Output: Prints some statistics and stores the results to a file. """ shuffle = True methods = ('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'binB-LD5', 'meLD5_50', 'meLD5_61', 'meLD5_70', 'isorankn', 'LD5') g = 0.5 max_iters = 300 duplicate_names = 3 f_values = [0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 4, 5] nvv = len(f_values) fname = os.path.join('multiplex', 'CS-Aarhus_multiplex.edges') experiment_seed = np.random.randint(0, 1000000) print "--- Experiment seed: {} ---\n".format(experiment_seed) random.seed(experiment_seed) np.random.seed(experiment_seed) res_precision = np.zeros((len(methods), nvv, n_reps)) res_recall = np.zeros((len(methods), nvv, n_reps)) res_fscore = np.zeros((len(methods), nvv, n_reps)) res_iterations = np.zeros((len(methods), nvv, n_reps)) res_t = np.zeros((len(methods), nvv, n_reps)) res_clusters = np.zeros((len(methods), nvv, n_reps)) res_costs = np.zeros((len(methods), nvv, n_reps)) res_lb = np.zeros((len(methods), nvv, n_reps)) # Lower bounds res_ub = np.zeros((len(methods), nvv, n_reps)) # Upper bound t_beg = time.time() date0 = dt.datetime.now() for r in range(n_reps): print "\n Repetition: {}".format(r) Gs = read_multiplex_data(fname, n_duplicate_names=duplicate_names) for i, f in enumerate(f_values): print "\nf={}.\n".format(f) cost_params = {'f': f, 'g': g, 'gap_cost': f} for j, method in enumerate(methods): print "\n method={}, f={}, rep={}".format(method, f, r) max_entities = None mai = 1 if method.startswith('LD') and len(method) > 2: mai = int(method[2:]) method = 'LD' elif method.startswith('binB-LD') and len(method) > 7: mai = int(method[7:]) method = 'binB-LD' elif method.startswith('meLD'): if i > 0: # No need to compute fixed entity method for different f values. continue parts = method.split('_') if len(parts[0]) > 4: mai = int(parts[0][4:]) max_entities = int(parts[1]) method = 'binB-LD' t0 = time.time() x, o = align_multiple_networks(Gs, cost_params, method=method, max_iters=max_iters, max_algorithm_iterations=mai, max_entities=max_entities, shuffle=shuffle) print "Optimization took {:.2f} seconds.".format(time.time() - t0) pr, rec, f1 = o['scores'] res_t[j, i, r] = time.time() - t0 res_precision[j, i, r] = pr res_recall[j, i, r] = rec res_fscore[j, i, r] = f1 res_iterations[j, i, r] = o['iterations'] res_clusters[j, i, r] = o['n_clusters'] res_costs[j, i, r] = o['cost'] res_lb[j, i, r] = o['lb'] res_ub[j, i, r] = o['ub'] if do_save and n_reps > 1: fname0 = util.save_data(locals(), "multiplex", date0) print "Wrote the results of repetition {} to: {}\n".format( r + 1, fname0) print "\nThe whole experiment took {:2f} seconds.".format(time.time() - t_beg) if do_save: fname = util.save_data(locals(), title, dir_name='multiplex{}'.format(str(dir_id))) print "Wrote the results to: {}".format(fname) #plot_toy_experiment_results(fname) print "F1 score:", np.mean(res_fscore, axis=2) print "Precision:", np.mean(res_precision, axis=2) print "Recall:", np.mean(res_recall, axis=2) print "Time:", np.mean(res_t, axis=2) print "Iterations:", np.mean(res_iterations, axis=2) print "Clusters:", np.mean(res_clusters, axis=2) print "Costs:", np.mean(res_costs, axis=2) print "Lower bounds:", np.mean(res_lb, axis=2) print "Upper bounds:", np.mean(res_ub, axis=2)
def experiment_multiple_trees(n_reps=1, n_trees=5, n_people=500, methods=('unary', 'LD', 'mKlau'), top_k_matches=5, f_vals=(0.1, 0.5, 1, 1.5, 2), title='genealogical', do_save=True, dir_id=None, rep_offset=0): nvv = len(f_vals) res_precision = np.zeros((len(methods), nvv, n_reps)) res_recall = np.zeros((len(methods), nvv, n_reps)) res_fscore = np.zeros((len(methods), nvv, n_reps)) res_t = np.zeros((len(methods), nvv, n_reps)) res_iterations = np.zeros((len(methods), nvv, n_reps)) res_clusters = np.zeros((len(methods), nvv, n_reps)) res_lb = np.zeros((len(methods), nvv, n_reps)) res_ub = np.zeros((len(methods), nvv, n_reps)) t_beg = time.time() start_date_part = str(dt.datetime.now())[:19] start_date_part = re.sub(' ', '_', start_date_part) start_date_part = re.sub(':', '', start_date_part) fname0 = os.path.join("experiment_results", "{}_part_{}.pckl".format( title, start_date_part)) for r in range(n_reps): print "\n--- Repetition {}. ---".format(r+1) # Generate data tree_files = extract_ft.get_k_fragments( n_trees, n_people, label="first{}".format(r+rep_offset)) people_index_tuples = [] for tf in tree_files: people, people_dict = person.read_people(tf, clean=True) #'family_trees/data/rand_frag_%d/' % i, clean=True) index = create_index(people) people_index_tuples.append((people, index, people_dict)) uniq_people = count_unique_people(tree_files) for i, f in enumerate(f_vals): print "\n rep={}, f={}".format(r+1, f) for mi, m in enumerate(methods): if m.startswith('meLD') and i > 0: # No need to compute fixed entity method for different f values. continue print "\n rep={}, f={}, method={}\n".format(r+1, f, m) t0 = time.time() precision, recall, fscore, n_clusters, lb, ub, iters = \ merge_multiple(people_index_tuples, 10, top_k_matches, method=m, uniq_people=uniq_people, f=f) res_precision[mi, i, r] = precision res_recall[mi, i, r] = recall res_fscore[mi, i, r] = fscore res_clusters[mi, i, r] = n_clusters res_t[mi, i, r] = time.time() - t0 res_iterations[mi, i, r] = iters res_lb[mi, i, r] = lb res_ub[mi, i, r] = ub if do_save and n_reps > 1: pickle.dump(locals(), open(fname0, 'wb')) print "Wrote the results of repetition {} to: {}\n".format(r+1, fname0) print "\nThe whole experiment took {:2f} seconds.".format(time.time()-t_beg) if do_save: fname = util.save_data(locals(), title, dir_name='genealogy{}'.format( str(dir_id))) print "Wrote the results to: {}".format(fname) print "F1 score:", np.mean(res_fscore, axis=2) print "Precision:", np.mean(res_precision, axis=2) print "Recall:", np.mean(res_recall, axis=2) print "Time:", np.mean(res_t, axis=2) print "Clusters:", np.mean(res_clusters, axis=2) print "Lower bounds:", np.mean(res_lb, axis=2) print "Upper bounds:", np.mean(res_ub, axis=2)
def get_field_lengths(data: list, headers: list) -> dict: field_data = {field: len(field) for field in headers} for entry in data: for key, value in entry.items(): if value is not None and len(str(value)) > field_data[key]: field_data[key] = len(str(value)) return field_data def format_parsed_data(data: list, headers: list, lengths: dict) -> str: entries = [] header_line = "" for header in headers: header_line += header.upper().ljust(lengths[header]) + ' ' entries.append(header_line.strip()) for entry in data: line = "" for header in headers: line += (str(entry[header]).ljust(lengths[header]) + ' ' if entry[header] is not None else ' ' * lengths[header] + ' ') entries.append(line.strip()) return '\n'.join(entry for entry in entries) if __name__ == "__main__": parsed_data = parse_json_file(args.input) tbtl_data = create_tbtl_data(parsed_data) save_data(tbtl_data, args.output)