def configure_ensemble(zk_id_ip_pairs, dynamic_file, conf_dir, data_dir): '''Configures zookeeper ensemble with zookeeper instances. After configuration, it starts the zookeeper server. ''' log.info('Doing a fresh Zookeeper ensemble configuration') log.info('Wiping out old state') _cmd_delete_old_state(data_dir) log.info('Resetting static configuration') _cmd_reset_config(dynamic_file, conf_dir) # Add hosts as participants to the ensemble configuration log.info('Resetting dynamic configuration') configs = [] for pair in zk_id_ip_pairs: zk_id = pair[0] zk_ip = pair[1] config = """server.{id}={ip}:2888:3888:participant;{port}""".format( id=zk_id, ip=zk_ip, port=ZK_PORT) configs.append(config) ensemble_config = '\n'.join(configs) utils.save_to_file(dynamic_file, ensemble_config) start_zookeeper(conf_dir) log.info('Ensemble Configured.')
def step3_fill_lengths(): """Retrieve the lengths of the pages via APIs""" cuisines = load_from_file('data/cuisines_langs.dat') # TODO: refactor grouping together pages, do only one request for every xyz.wikipedia.org params = {'action': 'query', 'prop': 'info', 'format': 'json'} skipped = [] for kk, vv in tqdm(cuisines.items()): for lang_prefix, page in tqdm(vv['languages'].items()): if lang_prefix != 'en': wiki_url = page['wiki_url'] api_url = f'https://{wiki_url}/w/api.php' params['titles'] = page['title'] with requests.Session() as session: post = session.post(api_url, params) if post.ok: res = post.json() else: print("Issue in POST call") print(f"{api_url}\n{params}") page_data = res['query']['pages'][next( iter(res['query']['pages']))] if 'length' in page_data: vv['languages'][lang_prefix]['length'] = page_data[ 'length'] else: skipped.append((kk, lang_prefix)) if skipped: for page, lang in skipped: print(f"[Skip] {page} in language {lang} (unavailable length)") save_to_file('data/cuisines_length.dat', cuisines)
def step2_populate_other_languages(): """Gets URLs and titles of cuisines in multiple languages""" cuisines_raw = load_from_file('data/cuisines_raw.dat') wiki_url = 'https://en.wikipedia.org/w/api.php' params = { 'action': 'query', 'prop': 'langlinks|info', 'llprop': 'url', 'lllimit': 'max', 'format': 'json' } print("Getting links for every cuisine for every language...") for vv in tqdm(cuisines_raw.values()): pageid = vv['pageid'] params['pageids'] = pageid with requests.Session() as session: post = session.post(wiki_url, params) res = post.json() res_info = res['query']['pages'][pageid] if 'langlinks' in res_info: vv['languages'] = { vv['lang']: { 'title': vv['*'], 'wiki_url': strip_url(vv['url']) } for vv in res_info['langlinks'] } vv['languages']['en'] = {} vv['languages']['en']['length'] = res_info['length'] vv['languages']['en']['title'] = res['query']['pages'][pageid][ 'title'] save_to_file('data/cuisines_langs.dat', cuisines_raw)
def update(self): if WORKERLISTGEN_SERVICE_URI and WORKERLISTGEN_CONTAINER_URI and API_AUTH: file_content = '# this file was auto-generated using workerlistgen\n' config_lines = InitHelper.get_config_lines(WorkerlistGen.linked_names) for config_line in config_lines: file_content += config_line file_content += '\n' save_to_file(CITUS_WORKERLIST_CONFIG_FILE, file_content) # find containers on this machine functioning as Citus master filters = dict() labels = [] labels.append('com.docker.compose.project=' + CITUS_STACK_NAME) labels.append('com.docker.compose.service=' + CITUS_SERVICE_NAME) filters['label'] = labels citus_masters = docker.containers(filters=filters) for citus_master in citus_masters: logger.info("Sending container '%s' signal '%d'", citus_master['Id'], signal.SIGHUP) docker.kill(citus_master['Id'], signal.SIGHUP) logger.info("===========END===========") else: raise RuntimeError('Docker Cloud environment variables not set')
def reconfigure_ensemble(region, zookeeper_id, zookeeper_ip, running_ids, ensemble_ip, dynamic_file, conf_dir, log_group): ''' Reconfigures the zookeeper ensemble by adding a new server to it. ''' # Get and reset the static configuration # The static file changes the path of the dynamic file location. log.info('Resetting static configuration') _cmd_reset_config(dynamic_file, conf_dir) # Add host as an observer to the ensemble configuration log.info('Resetting dynamic configuration') config = _cmd_get_zookeeper_configuration(ensemble_ip) config += "\nserver.{id}={ip}:2888:3888:observer;{port}".format( id=zookeeper_id, ip=zookeeper_ip, port=ZK_PORT) utils.save_to_file(dynamic_file, config) start_zookeeper(conf_dir) # Wait a bit for Zookeeper to initialize itself # For some reason it crashes the moment we try to reconfigure it log.info('Sleeping for a bit') time.sleep(30) # Remove ids from the ensemble log.info('Reconfiguration by removing') remove_zookeeper_nodes(region, ensemble_ip, running_ids, log_group) # Add host as participant to the ensemble with "add" command log.info('Reconfiguration by adding') log.info('Adding id %s' % zookeeper_id) add_zookeeper_node(ensemble_ip, zookeeper_ip, zookeeper_id) log.info('Ensemble Reconfigured.')
def export(cls, rules): """ Export rules to the provisory config file. `rules` are tuples (rule, score). """ save_to_file("corpus/contextual_rules.pdg", "\n".join(rule for rule, score in rules))
def export(cls, rules): """ Rules are tuples (rule, score) """ save_to_file("corpus/lexical_rules.pdg", "\n".join("%s\t%f" % (rule, float(score)) for rule, score in sorted(rules, key=itemgetter(1), reverse=True)))
def export(cls, rules): """ Rules are tuples (rule, score) """ save_to_file("corpus/lemmatizer_rules.pdg", "\n".join("%s\t%f" % (rule, float(score)) for rule, score in rules))
def auto_save(self): current_time = self._origo[PROP_TEXT_TIMER] if self._auto_save_time + INT_AUTO_SAVE_INTERVAL <= current_time: # Update last-time checked value self._auto_save_time = current_time # Save created mesh save_to_file(path=INT_AUTO_SAVE_FILE, data=self._surface.serialise()) print('[OKAY] file has been auto-saved to:', INT_AUTO_SAVE_FILE)
def main(): packages = remove_irrelevant_packages(get_top_packages(), TO_CHART) annotate_wheels(packages) wheel_types = ['manylinux1_py3', 'manylinux1_py2', 'win32_py3', 'win32_py2', 'win_amd64_py3', 'win_amd64_py2', 'macos_py2', 'macos_py2'] packages = [p for p in packages if p['is_c_module'] or any([p[t] for t in wheel_types])] save_to_file(packages, 'results.json') generate_svg_wheel(packages, len(packages))
def add_to_corpus(article_id): """ Retrieve an article in db, clean it, and add it to corpus. """ t = Article.objects.get(pk=article_id).content t = normalize_text(unescape_entities(t)) t = t.encode("utf-8") t = t.decode("string_escape") save_to_file("corpus/%s.txt" % article_id, t) print normalize_text(t)
def initialize(region, instance_id, id_file, log_group): ''' Initializes the zookeeper instance with a valid zookeeper id ''' log.info('Initializing instance, instance_id=%s' % instance_id) zk_id = aws.get_tag(region, instance_id, ZK_ID_TAG) if not zk_id: zk_id = get_zookeeper_id(region, log_group) aws.set_tag(region, instance_id, ZK_ID_TAG, zk_id) utils.save_to_file(id_file, zk_id) log.info('Initialized with zookeeper_id=%s' % zk_id) return zk_id
def generate_cache_json(data, wavs): cache = {} for row in data: value = (row['speed'], row['pitch'], row['voice']) for ai_output in row['ai_outputs']: key = ai_output['wav_filename'] if key not in wavs: continue cache[key] = value utils.save_to_file(cache, settings.CACHE_JSON_PATH)
def download_js(parsed_data, folder, base_url): # find all js links = [sc["src"] for sc in parsed_data.find_all("script", src=True)] for link in links: filename = re.search(r'/([^/]+)$', link) link = transform_url(link, base_url) if not filename or link is None: continue response = requests.get(link) if response.ok: save_to_file(response.content, folder + filename.group(1))
def download_media(parsed_data, folder, base_url): # find all jpg, png, gif, svg links = set( [link['href'] for link in parsed_data.findAll('link', href=True)] + [img['src'] for img in parsed_data.find_all('img', src=True)]) for link in links: filename = re.search(r'/([\w_\-.]+[.](jpg|gif|png|jpeg|svg))$', link) link = transform_url(link, base_url) if not filename or link is None: continue response = requests.get(link) if response.ok: save_to_file(response.content, folder + filename.group(1))
def compare_sok_and_tf(args): sok_results = test_sok_multi_dense_emb(args) utils.save_to_file("./sok_results_" + str(args.task_id) + ".file", sok_results) barrier = hvd.allreduce(tf.zeros([1])) # if args.task_id != 0: # return tf_results = test_tf_multi_dense_emb(args) all_sok_results_list = list() for i in range(args.worker_num): sok_results = utils.restore_from_file("./sok_results_" + str(i) + ".file") sok_results = tf.concat(sok_results, axis=0) # [iter-num, replica-bs, vectors] all_sok_results_list.append(sok_results) all_sok_results_list = tf.concat(all_sok_results_list, axis=1) all_sok_results_list = tf.split(all_sok_results_list, num_or_size_splits=len(tf_results), axis=0) all_sok_results_list = [tf.squeeze(item) for item in all_sok_results_list] if len(all_sok_results_list) != len(tf_results): raise ValueError( "The length of sok results is not equal to that of tensorflow.") if args.dynamic_input == 1: atol = 1e0 rtol = 1e-2 elif args.mixed_precision: atol = 1e-2 rtol = 1e-2 else: atol = 1e-4 rtol = 1e-4 for i, sok_vector in enumerate(all_sok_results_list): tf.debugging.assert_near( tf.reshape(sok_vector, shape=[-1, tf.shape(sok_vector)[-1]]), tf_results[i], atol=atol, rtol=rtol, message=("the values is not consistent on Iteration: %d" % i)) print("\n[INFO]: For multiple dense embedding layer: with Horovod, the embedding"+\ " vectors obtained from SOK and TF are consistent for %d iterations." " With mixed_precision = %s" %(len(sok_results), args.mixed_precision))
def test_tf_dense_model(args, init_tensors, *random_samples): dataset = utils.tf_dataset(*random_samples, batchsize=args.global_batch_size, to_sparse_tensor=False, repeat=1) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True) tf_dense_demo = TfDenseDemo(init_tensors, args.global_batch_size, args.slot_num, args.nnz_per_slot, args.embedding_vec_size) optimizer = utils.get_dense_optimizer(args.optimizer)(learning_rate=0.1) if args.mixed_precision: optimizer = tf.keras.mixed_precision.LossScaleOptimizer( optimizer, initial_scale=1024) @tf.function def _train_step(inputs, labels): with tf.GradientTape() as tape: logit, embedding_vector = tf_dense_demo(inputs, training=True) loss = loss_fn(labels, logit) if args.mixed_precision: _loss = optimizer.get_scaled_loss(loss) else: _loss = loss grads = tape.gradient(_loss, tf_dense_demo.trainable_variables) if args.mixed_precision: grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(zip(grads, tf_dense_demo.trainable_variables)) return loss, embedding_vector tf_results = list() for i, (input_tensors, labels) in enumerate(dataset): print("-" * 30, str(i), "-" * 30) loss, embedding_vector = _train_step(input_tensors, labels) print("[INFO]: iteration {}, loss {}".format(i, loss)) tf_results.append(embedding_vector.numpy()) if not hasattr(args, "task_id"): args.task_id = 0 if 1 == args.save_params and args.task_id == 0: filepath = r"./embedding_variables/" utils.save_to_file(os.path.join(filepath, r"tf_variable.file"), tf_dense_demo.params.numpy()) return tf_results
def download_css(parsed_data, folder, base_url): # find all css links = [ link['href'] for link in parsed_data.findAll('link', href=True, rel="stylesheet") ] for link in links: filename = re.search(r'/([^/]+)$', link) link = transform_url(link, base_url) if not filename or link is None: continue response = requests.get(link) if response.ok: save_to_file(response.content, folder + filename.group(1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', required=True, help='Input file') parser.add_argument('--output', required=True, help='Output file') args = parser.parse_args() logging.basicConfig(filename="1-4.log", level=logging.INFO) need_args = ('matrix', 'eps') init_dict = read_data(args.input, need_args) A, eps = init_dict['matrix'], init_dict['eps'] values, vectors = jacobi_eigenvalue(A, eps) numpy_eigs(A, values, vectors) save_to_file(args.output, eigenvalues=values, eigenvectors=vectors)
def _update_haproxy(self, cfg): if self.link_mode in ["cloud", "new"]: if Haproxy.cls_cfg != cfg: logger.info("HAProxy configuration:\n%s" % cfg) Haproxy.cls_cfg = cfg if save_to_file(HAPROXY_CONFIG_FILE, cfg): Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process) elif self.ssl_updated: logger.info("SSL certificates have been changed") Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process) else: logger.info("HAProxy configuration remains unchanged") logger.info("===========END===========") elif self.link_mode in ["legacy"]: logger.info("HAProxy configuration:\n%s" % cfg) if save_to_file(HAPROXY_CONFIG_FILE, cfg): UpdateHelper.run_once()
def main(): # https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page=1 # https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page=2 # https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page=3 # 用于将所有 anser 的字典形式保存在一个 list, 便于插入 mongodb anser_list = [] for i in range(1, 11): url = 'https://www.zhihu.com/collection/38887091?ssr_src=heifetz&page={}'.format( i) ansers = models_from_url(url) # 合并 list anser_list += ansers log('anser_list', type(anser_list), len(anser_list)) # 两种存储数据的方式, mongodb, 写入数据文件(json格式) # insert_many(db, 'zhihuAnser', anser_list) # 写入文件 save_to_file(anser_list, 'zhihuAnser.txt')
def save_xml(): 'does not work with archive' texts = [cls.get_xml() for cls in classes] [ save_to_file(cls, text, format='xml') for cls, text in zip(classes, texts) ] return [['Saved to XML', [('green', 'ok')]]]
def parse_svg_file(self, file_path, class_): print("Parsing %s ..." % file_path) if class_ in cache.class_text.keys(): return if os.path.exists('./idx/svg_text/' + class_ + '.txt'): # Load in text = open('./idx/svg_text/' + class_ + '.txt', 'r').read() cache.class_text[class_] = text.split('\n') return root = ET.parse(file_path).getroot() # root.tag = {http://www.w3.org/2000/svg}svg namespace = re.findall('\{.*}', root.tag)[0] # print(namespace) # # get ids # pos_y = [] # for item in root.findall('.//%spath' % namespace): # #print(item.get('d').split()[1]) # # M0 31 H600 # pos_y.append(item.get('d').split()[1]) # # if not pos_y: # for item in root.findall('.//%stext' % namespace): # pos_y.append(item.get('y')) # get content contents = "" for item in root.findall('.//%stextPath' % namespace): #print(item.text) contents = contents + item.text + "\n" if not contents: for item in root.findall('.//%stext' % namespace): # print(item.text) contents = contents + item.text + "\n" # assert (len(pos_y) == len(contents)) # for pos, content in zip(pos_y, contents): # print(pos, content) utils.save_to_file('./idx/svg_text/', class_ + '.txt', contents) cache.class_text[class_] = contents.split('\n')
def _update_haproxy(self, cfg): if HAPROXY_SERVICE_URI and HAPROXY_CONTAINER_URI and API_AUTH: if Haproxy.cls_cfg != cfg: logger.info("HAProxy configuration:\n%s" % cfg) Haproxy.cls_cfg = cfg if save_to_file(HAPROXY_CONFIG_FILE, cfg): Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process) elif self.ssl_updated: logger.info("SSL certificates have been changed") Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process) else: logger.info("HAProxy configuration remains unchanged") logger.info("===========END===========") else: logger.info("HAProxy configuration:\n%s" % cfg) save_to_file(HAPROXY_CONFIG_FILE, cfg) UpdateHelper.run_once()
def get_wikimedia_languages_list(): """Download and create a correlation dict from language prefixes to long language names""" wiki_languages = {} req = requests.get( 'https://meta.wikimedia.org/wiki/Table_of_Wikimedia_projects') soup = BeautifulSoup(req.text, features='html.parser') table = soup.find_all('table', class_='sortable')[0] for tr in table.find_all('tr'): tds = tr.find_all('td') if not tds: continue code, english_name, local_name = [td.text.strip() for td in tds[:3]] code = code.replace(':', '') wiki_languages[code] = { 'eng_name': english_name, 'local_name': local_name } save_to_file('data/wiki_languages.dat', wiki_languages)
def step1_prepare_cuisines_data(): """Create a data structure starting from the cuisines list in Template:Cuisines""" req = requests.get('https://en.wikipedia.org/wiki/Template:Cuisines') soup = BeautifulSoup(req.text, features='html.parser') html_cuisines = soup.find(title='National dish').find_next('ul') cuisines_titles = [] skipped = [] for ch in html_cuisines: if not isinstance(ch, str): if len(ch.find_all('a')) > 1: # If it has sub-cuisines (regional ones) consider only the first cuisine = ch.find_all('a')[0] else: # If it's only a national cuisine cuisine = ch.find('a') # If it's not a redirect to a different page (e.g.: "cuisine" section in the country page) if not cuisine.get('class'): title, href = cuisine.get('title'), cuisine.get('href') cuisines_titles.append( (title, unquote(href.replace('/wiki/', '')))) elif 'mw-redirect' in cuisine.get('class'): skipped.append(cuisine.get('title')) else: raise ValueError(f"Undefined case: {cuisine}") if skipped: for skip in skipped: print(f"[Skip] {skip} (redirect)") api_url = 'https://en.wikipedia.org/w/api.php' params = {'action': 'query', 'format': 'json'} cuisines_raw = {} for chunk in split_to_chunks(cuisines_titles, 50): params['titles'] = f"{'|'.join([c[1] for c in chunk])}" with requests.Session() as session: post = session.post(api_url, params) res = post.json() for vv in res['query']['pages'].values(): cuisines_raw[vv['title']] = { 'pageid': str(vv['pageid']), 'languages': {} } save_to_file('data/cuisines_raw.dat', cuisines_raw)
def parse_file(path): """It parses the corpus file into a list of lists, used for experimental purposes instead of parsing the whole corpus every time. Arguments: path {[str]} -- [path to corpus output] Returns: [list of lists] -- [sentences of the corpus] """ sentences = [] with open(path, encoding='utf-8', mode='r') as f: lines = f.readlines() for line in tqdm(lines, desc=f'Fetching {path[path.rfind("/") + 1:]} output'): line_ = ast.literal_eval(line) sentences.append(line_) save_to_file(sentences, path) return sentences
def test_save_and_load(self): team_members = main.get_team_members() team_data = {'date': team_members} # save data to file data_file_name = '../data/test/web_data_json_test.data' utils.save_to_file(data_file_name, team_data) # # test loading from the file and if the name of Johanna can be found # member_data = utils.load_from_file(data_file_name) a_day_data = {} # take the first item in the dictionary; doesn'e matter which one it is for key in member_data: a_day_data = member_data[key] break found_Johanna = False for d in a_day_data: if d['name'] == 'Johanna Nicoletta': found_Johanna = True self.assertEqual(found_Johanna, True, "Can not save or load from file")
def _update_haproxy(self, cfg): if Haproxy.cls_cfg != cfg: logger.info("HAProxy configuration:\n%s" % cfg) Haproxy.cls_cfg = cfg if save_to_file(HAPROXY_CONFIG_FILE, cfg): UpdateHelper.run_reload() elif self.ssl_updated: logger.info("SSL certificates have been changed") UpdateHelper.run_reload() else: logger.info("HAProxy configuration remains unchanged") logger.info("===========END===========")
def manipulate_homophily(self, strategy_func, strategy_name, pick_strategy, manipulation_clas, network_name): self.global_homophilies = [] class_partitions = [] nodes_with_manipulation_clas = [ node for node in self.G.nodes() if self.get_node_class(node) == manipulation_clas ] class_partitions.append(len(nodes_with_manipulation_clas) / self.size) homo_list_before = self.local_homophily() nodes_to_remove = [ node for node in self.G.nodes() if self.get_node_class(node) != manipulation_clas ] utils.save_to_file(homo_list_before, network_name, '{0}_homo_list_before'.format(strategy_name)) ''' add, remove or change node ''' strategy_func(nodes_to_remove, nodes_with_manipulation_clas, class_partitions, pick_strategy, manipulation_clas) homo_list_after = self.local_homophily() utils.save_to_file(homo_list_after, network_name, '{0}_homo_list_after'.format(strategy_name)) utils.save_to_file(self.global_homophilies, network_name, '{0}_global_homophilies'.format(strategy_name)) utils.plot_local_homophily(homo_list_before, homo_list_after, network_name, strategy_name) utils.plot_global_homophily(self.global_homophilies, network_name, strategy_name) utils.plot_all(class_partitions, self.global_homophilies, self.homophily_per_clas, manipulation_clas, network_name, strategy_name)
def main(): # 'https: // movie.douban.com/top250' # 豆瓣网直接复制下来的 url 有病, invalidURL , no host supplied ''' 前三页的 url https://movie.douban.com/top250 https://movie.douban.com/top250?start=25&filter= https://movie.douban.com/top250?start=50&filter= ''' # 用于将所有 movie 的字典形式保存在一个 list, 便于插入 mongodb movie_list = [] for i in range(0, 250, 25): url = 'https://movie.douban.com/top250?start={}'.format(i) movies = models_from_url(url) # 合并 list movie_list += movies # download_img('doubanTop250', movies) log('movie_list', type(movie_list), len(movie_list)) # 两种存储数据的方式, mongodb, 写入数据文件(json格式) insert_many(db, 'doubanTop250', movie_list) # 写入文件 save_to_file(movie_list, 'doubanTop250.txt')
def _update_haproxy(self, cfg): if Haproxy.cls_cfg != cfg: logger.info("HAProxy configuration has changed.") # Logging the config file may be helpful in the future but is creating too many logs for now. # logger.info("HAProxy configuration:\n%s" % cfg) Haproxy.cls_cfg = cfg if save_to_file(HAPROXY_CONFIG_FILE, cfg): Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process) elif self.ssl_updated: logger.info("SSL certificates have been changed") Haproxy.cls_process = UpdateHelper.run_reload(Haproxy.cls_process) else: logger.info("HAProxy configuration remains unchanged") logger.info("===========END===========")
def save_all(): [set_header(x) for x in classes if not x.load_from_first] texts = [cls.get_data() for cls in classes] diffs = [get_diff(cls, text) for cls, text in zip(classes, texts)] if settings.MOD_SOURCE: [save_to_file(cls, text) for cls, text in zip(classes, texts)] else: zf = ZipFile(settings.PATH_TO_MOD) names = set(zf.namelist()) - set([x.FILE_PATH for x in classes]) data = [(path, zf.read(path)) for path in names] zf.close() zf = ZipFile(settings.PATH_TO_MOD, 'w') [save_to_zip(cls, zf, text) for cls, text in zip(classes, texts)] [zf.writestr(file, text) for file, text in data] zf.close() return diffs
item = { 'ime': values[0], 'ram': int( values[1] ), 'ocjena': float( values[2] ), 'cijena': float( values[3] ) } ramovi.append(item) fp.close() fp = open('Baza podataka/procesori.txt') lines = fp.readlines() for line in lines: line = line.strip() values = line.split('|') if len(values) != 4: break item = { 'ime': values[0], 'cpu': int( values[1] ), 'ocjena': float( values[2] ), 'cijena': float( values[3] ) } procesori.append(item) utils.save_to_file('data/igrice.txt', games) utils.save_to_file('data/ramovi.txt', ramovi) utils.save_to_file('data/graficke.txt', graficke) utils.save_to_file('data/procesori.txt', procesori)
def process_exp2_file(filename, **kwargs): print 'Processing exp2 file: %s' % filename # function scope constants cbr_rate = kwargs.pop('cbr_rate') agent_names = (kwargs.pop('agent1_name'), kwargs.pop('agent2_name')) tcp_fids = ('2', '3') tcp_src_nodes = ('0', '4') tcp_sink_nodes = ('3', '5') # function scope refs pkt_tcp_uids = [set(), set()] pkt_tcp = [0, 0] drop_rates = [0.0, 0.0] latencies = [0.0, 0.0] pkt_drops = [0, 0] throughputs = [0, 0] rtts = [{}, {}] cells = None i = 0 with open(filename, 'r') as f: for line in f: cells = line.split() try: i = tcp_fids.index(cells[7]) except ValueError: continue # now switched to corresponding var space pkt_tcp_uids[i].add(cells[11]) if cells[0] == evt_drop: pkt_drops[i] += 1 continue elif cells[0] == evt_enque: if cells[2] == tcp_src_nodes[i] and cells[4] == 'tcp': if cells[10] not in rtts[i]: rtts[i][cells[10]] = [float(cells[1]), None] continue elif cells[0] == evt_recv: if cells[3] == tcp_sink_nodes[i]: throughputs[i] += int(cells[5]) elif cells[3] == tcp_src_nodes[i] and cells[4] == 'ack': if cells[10] in rtts[i]: rtts[i][cells[10]][1] = float(cells[1]) throughputs[i] += int(cells[5]) continue # save to file for i in (0, 1): throughputs[i] = float(throughputs[i]) / (1024.0 * 1024.0) pkt_tcp[i] = len(pkt_tcp_uids[i]) drop_rates[i] = float(pkt_drops[i]) / float(pkt_tcp[i]) latencies[i] = avg_rtts(rtts[i]) print 'From pkt drop field: %d packets were dropped' % pkt_drops[i] cust_name = '_'.join(agent_names) save_to_file(format_data_file_name(filename, cust_name, 'THP'), delim, *[cbr_rate, throughputs[0], throughputs[1]]) save_to_file(format_data_file_name(filename, cust_name, 'DR'), delim, *[cbr_rate, drop_rates[0], drop_rates[1]]) save_to_file(format_data_file_name(filename, cust_name, 'LT'), delim, *[cbr_rate, latencies[0], latencies[1]])
def save(self): # Save created mesh file_path = INT_TEMP_SAVE_FILE.format(datetime.now()) save_to_file(path=file_path, data=self._surface.serialise()) print('[OKAY] file has been saved to:', file_path)
def process_exp1_file(filename, **kwargs): print 'Processing exp1 file: %s' % filename # function scope constants cbr_rate = kwargs.pop('cbr_rate') agent_name = kwargs.pop('agent_name') tcp_fid = '2' tcp_src_node = '0' tcp_sink_node = '3' # function scope refs pkt_tcp_uids = set() pkt_drops = 0 throughput = 0 rtts = {} cells = None with open(filename, 'r') as f: for line in f: cells = line.split() # grab tcp events according to the flow id if cells[7] == tcp_fid: # adds the packet unique id to the set pkt_tcp_uids.add(cells[11]) # record tcp drop event, no need to proceed if cells[0] == evt_drop: pkt_drops += 1 continue # a tcp packet enque (send) event elif cells[0] == evt_enque: # if it is a tcp packet sent from tcp src node if cells[2] == tcp_src_node and cells[4] == 'tcp': # record the start time of rtt: (start, end) # if the seq num not exists in the dict if cells[10] not in rtts: rtts[cells[10]] = [float(cells[1]), None] continue # a tcp packet recv event elif cells[0] == evt_recv: # if it is a tcp packet recved by tcp sink node if cells[3] == tcp_sink_node: # record record packet size to throughput throughput += int(cells[5]) # a tcp src node recv (ack) event elif cells[3] == tcp_src_node and cells[4] == 'ack': if cells[10] in rtts: # update rtt end time rtts[cells[10]][1] = float(cells[1]) # record ack packet size to throughput throughput += int(cells[5]) continue # convert throughput, in MB throughput = float(throughput) / (1024.0 * 1024.0) # calculate drop rate, in digits pkt_tcp = len(pkt_tcp_uids) drop_rate = float(pkt_drops) / float(pkt_tcp) # calculate latency, in seconds latency = avg_rtts(rtts) print 'From pkt drop field: %d packets were dropped' % pkt_drops # append to file save_to_file(format_data_file_name(filename, agent_name, 'THP'), delim, *[cbr_rate, throughput]) save_to_file(format_data_file_name(filename, agent_name, 'DR'), delim, *[cbr_rate, drop_rate]) save_to_file(format_data_file_name(filename, agent_name, 'LT'), delim, *[cbr_rate, latency])
def save_xml(): 'does not work with archive' texts = [cls.get_xml() for cls in classes] [save_to_file(cls, text, format='xml') for cls, text in zip(classes, texts)] return [['Saved to XML', [('green', 'ok')]]]
def create_pid_file(): pid = str(os.getpid()) save_to_file(PID_FILE, pid) return pid
def make(self, force=False): """ Build the lexicon. """ final = {} lemme_to_original = {} C = Corpus(self.CORPUS_EXT) for tk in C.tokens: # Don't take Proper nouns (SBP) in lexicon if tk.verified_tag[:3] == "SBP": continue # Manage tags frequences if not tk.original in final: final[tk.original] = defaultdict(int) final[tk.original][tk.verified_tag] += 1 # Manage lemmes frequences if not tk.original in lemme_to_original: lemme_to_original[tk.original] = {} if not tk.verified_tag in lemme_to_original[tk.original]: lemme_to_original[tk.original][tk.verified_tag] = defaultdict(int) # Frequence of this lemme for this tag for this word... lemme_to_original[tk.original][tk.verified_tag][tk.verified_lemme] += 1 def get_one_line(key): """ Return one line of the lexicon. Take the token.original string in parameter. """ return u"%s\t%s" % (key, get_tags(key)) def get_tags(key): """ Return sorted tags for a original word compiled in a string : tag/lemme tag/lemme """ # Retrieve tags tags = sorted([(k, v) for k, v in final[key].iteritems()], key=itemgetter(1), reverse=True) # Build final datas final_data = [] for tag, score in tags: computed_lemmes = get_lemmes(key, tag) lemme, score = computed_lemmes[0] final_data.append(u"%s/%s" % (tag, lemme)) # Return it as a string return u" ".join(final_data) def get_lemmes(key, tag): """ Return sorted lemmes for one word with one POS tag. """ return sorted(((k, v) for k, v in lemme_to_original[key][tag].iteritems()), key=itemgetter(1), reverse=True) d = [] for k, v in sorted(final.iteritems()): d.append(get_one_line(k)) final_d = u"\n".join(d) # d += u"%s\t%s\n" % (k, " ".join([u"%s/%s" % (tp[0], sorted(lemme_to_original[k][tp[0]], key=itemgetter(1), reverse=True)[0]) for tp in sorted([(k2, v2) for k2, v2 in v.iteritems()], key=itemgetter(1), reverse=True)])) ext = force and self.VALID_EXT or self.PENDING_EXT save_to_file("%s/lexicon%s" % (self.PATH, ext), unicode(final_d))
def main(): packages = remove_irrelevant_packages(get_top_packages(), int(TO_CHART * 1.05)) packages = annotate_pep8(packages) packages = remove_irrelevant_packages(packages, TO_CHART) save_to_file(packages, 'results.json') generate_svg_wheel(packages, len(packages))
def export(cls, rules): """ Rules are tuples (rule, score) """ save_to_file("corpus/contextual_rules.pdg", "\n".join(rule for rule, score in rules))
def main(): packages = remove_irrelevant_packages(get_top_packages(), TO_CHART) annotate_wheels(packages) save_to_file(packages, 'results.json')