def run(self): data = [self.clean_row(d) for d in self.a11y_raw] parsed_datasets = [ ('a11y', self.make_a11y_data(data)), ('agencies', self.make_agency_data(data)), ('domains', self.make_domain_data(data)), ] mkdir_p(results_dir({})) for name, data in parsed_datasets: path = '{}/{}.json'.format(results_dir({}), name) with open(path, 'w+') as f: json.dump(data, f, indent=2)
def run(self): data = [self.clean_row(d) for d in self.a11y_raw] parsed_datasets = [ ('a11y', self.make_a11y_data(data)), ('agencies', self.make_agency_data(data)), ('domains', self.make_domain_data(data)), ] mkdir_p(results_dir()) for name, data in parsed_datasets: path = '{}/{}.json'.format(results_dir(), name) with open(path, 'w+') as f: json.dump(data, f, indent=2)
def save_singleimages(self, images, filenames, save_dir, split_dir, sentenceID, imsize): for i in range(images.size(0)): s_tmp = '%s/%s' % (save_dir, filenames[i]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) fullpath = '%s_%d_sentence%d.png' % (s_tmp, imsize, sentenceID) # range from [-1, 1] to [0, 255] img = images[i].add(1).div(2).mul(255).clamp(0, 255).byte() ndarr = img.permute(1, 2, 0).data.cpu().numpy() im = Image.fromarray(ndarr) im.save(fullpath)
def resources_utilization(self, dataset, filename, stages): for resource in dataset: if resource and len(resource) > 0: x = numpy.arange(0, len(resource) * config.LOG_INTERVAL, config.LOG_INTERVAL) y = numpy.array(resource) plt.plot(x, y) # Add Stages Lines self.draw_stage(stages) plt.ylim((0, 105)) ylabel = plt.ylabel('Utilization (%)') xlabel = plt.xlabel('Seconds') if len(dataset) > 3: lgd_labels = ['CPU', 'Disk', 'Net', 'Net-lo'] else: lgd_labels = ['CPU', 'Disk', 'Net'] lgd = plt.legend(lgd_labels, loc='upper right', bbox_to_anchor=(1.02, -0.1000), ncol=3) title = plt.title('Resource Utilization') plt.gca().yaxis.grid(True) plot_filename = utils.mkdir_p(filename + '_resource_utilization.' + config.PLOT_FORMAT) plt.savefig(plot_filename, bbox_extra_artists=(lgd, title, ylabel, xlabel), bbox_inches='tight') plt.close()
def disk_byte_multi(self, dataset, filename, stages): data = dataset['dataset'] if len(data) > 0: # Plotting 1 line per worker x = numpy.arange(0, len(data) * config.LOG_INTERVAL, config.LOG_INTERVAL) y = numpy.array(data) # Transform bytes to MBytes y /= (1024 * 1024) plt.plot(x, y) # Add Stages Lines self.draw_stage(stages) plt.ylabel('MBytes') plt.xlabel('Seconds') legend = [] for worker in dataset['labels']: str_split = worker.split('_!_') legend.append(config.LABELS[str_split[0]] + ' ' + str_split[1]) plt.legend(legend, loc='lower left') plt.title('Disk Byte Transfer per Machine') plt.suptitle(os.path.basename(filename)) plt.gca().yaxis.grid(True) plot_filename = utils.mkdir_p(filename + '_disk_byte_machines.' + config.PLOT_FORMAT) plt.savefig(plot_filename) plt.close()
def resource(plot_title, scenario, dataset, plot_dir): colors = {} color = None res = OrderedDict({}) resource = dataset['resource'] if resource is not None: if 'SWI' in scenario: res['CPU W'] = resource['Worker']['CPU'] res['CPU S'] = resource['Swift']['CPU'] res['Disk W'] = resource['Worker']['Disk_Busy'] res['Disk S'] = resource['Swift']['Disk_Busy'] res['Net W'] = resource['Worker']['Network'] res['Net S'] = resource['Swift']['Network'] elif 'GC' in scenario: res['CPU'] = resource['Worker']['CPU'] res['Disk'] = resource['Worker']['Disk_Busy'] res['Net'] = resource['Worker']['Network'] else: res['CPU W'] = resource['Worker']['CPU'] res['CPU D'] = resource['DataNode']['CPU'] res['Disk W'] = resource['Worker']['Disk_Busy'] res['Disk D'] = resource['DataNode']['Disk_Busy'] res['Net W'] = resource['Worker']['Network'] res['Net D'] = resource['DataNode']['Network'] offset = 1 if len(res) > 0: medianprops = dict(linewidth=5, color='k') for (i, r) in enumerate(res): if r not in colors: color = utils.getNewColor(color) colors[r] = color x = np.array(res[r]) x = x.transpose() if len(x) == 0: continue x_mav = utils.moving_average_exp(x, 5) plt.boxplot(x_mav, positions=[i + offset], widths=0.8, patch_artist=True, boxprops={'edgecolor': "k", "facecolor": colors[r]["bar_color"]}, whiskerprops={'color': 'k'}, flierprops={'color': 'k'}, medianprops=medianprops, showfliers=False) x_ticks = np.arange(offset, len(res) + offset, offset).tolist() x_lim = [0, len(res) + offset] plt.xticks(x_ticks, res.keys(), rotation=70) plt.xlim(x_lim) plt.ylim([0, 100]) plt.grid() title = plt.title(plot_title) plot_filename = utils.mkdir_p(os.path.join(plot_dir, 'resources.' + config.PLOT_FORMAT)) plt.savefig(plot_filename, bbox_extra_artists=(title,), bbox_inches='tight') plt.close()
def save_superimages(self, images_list, filenames, save_dir, split_dir, imsize): batch_size = images_list[0].size(0) num_sentences = len(images_list) for i in range(batch_size): s_tmp = '%s/super/%s/%s' % (save_dir, split_dir, filenames[i]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) # savename = '%s_%d.png' % (s_tmp, imsize) super_img = [] for j in range(num_sentences): img = images_list[j][i] # print(img.size()) img = img.view(1, 3, imsize, imsize) # print(img.size()) super_img.append(img) # break super_img = torch.cat(super_img, 0) vutils.save_image(super_img, savename, nrow=10, normalize=True)
def disk_throughput_multi_figure(dataset, filename): workloads = [] colors = {} color = None for s in dataset: for w in dataset[s]: if w not in workloads: workloads.append(w) for w in workloads: res = OrderedDict({}) for s in dataset: try: resource = dataset[s][w]['resource'] if resource is not None: if 'SWI' in s: res[s] = [x + y for x, y in izip_longest( resource['Swift']['Disk_Bytes'], resource['Worker']['Disk_Bytes'], fillvalue=0)] else: res[s] = [x + y for x, y in izip_longest( resource['DataNode']['Disk_Bytes'], resource['Worker']['Disk_Bytes'], fillvalue=0)] except KeyError: pass offset = 1 if len(res) > 0: for (i, r) in enumerate(res): if r not in colors: color = utils.getNewColor(color) colors[r] = color x = np.array(res[r]) x /= (1024 * 1024) x = x.transpose() if len(x) == 0: continue plt.boxplot(x, positions=[i + offset], widths=0.8, patch_artist=True, boxprops={'edgecolor': "k", "facecolor": colors[r]["bar_color"]}, whiskerprops={'color': 'k'}, flierprops={'color': 'k'}, medianprops={'color': 'k'}) x_ticks = np.arange(offset, len(res) + offset, offset).tolist() x_lim = [0, len(res) + offset] plt.xticks(x_ticks, res.keys(), rotation=70) plt.xlim(x_lim) plt.ylabel('MB/s') title = plt.title(w) plot_filename = utils.mkdir_p(os.path.join(filename, w, 'disk_throughput.' + config.PLOT_FORMAT)) plt.savefig(plot_filename, bbox_extra_artists=(title,), bbox_inches='tight') plt.close()
def mem(self, dataset, filename, stages): if len(dataset) > 0: x = numpy.arange(0, len(dataset) * config.LOG_INTERVAL, config.LOG_INTERVAL) y = numpy.array(dataset) if config.PLOT_PER_RESOURCE: plt.plot(x, y) # Add Stages Lines self.draw_stage(stages) plt.ylabel('Utilization (%)') plt.xlabel('Seconds') plt.title('Total Memory Utilization') plt.suptitle(os.path.basename(filename)) plt.gca().yaxis.grid(True) plot_filename = utils.mkdir_p(filename + '_memory_total.' + config.PLOT_FORMAT) plt.savefig(plot_filename) plt.close() return y.tolist()
def disk_await(self, dataset, filename, stages): if len(dataset) > 0: x = numpy.arange(0, len(dataset) * config.LOG_INTERVAL, config.LOG_INTERVAL) y = numpy.array(dataset) plt.plot(x, y) # Add Stages Lines self.draw_stage(stages) plt.ylabel('Await Time (ms)') plt.xlabel('Seconds') plt.title('Total Disk Await Time') plt.suptitle(os.path.basename(filename)) plt.gca().yaxis.grid(True) plt.axes().set_yscale('log') plot_filename = utils.mkdir_p(filename + '_disk_await_total.' + config.PLOT_FORMAT) plt.savefig(plot_filename) plt.close() return y.tolist()
def mem_multi(self, dataset, filename, stages): data = dataset['dataset'] if len(data) > 0: # Plotting 1 line per worker x = numpy.arange(0, len(data) * config.LOG_INTERVAL, config.LOG_INTERVAL) y = numpy.array(data) plt.plot(x, y) # Add Stages Lines self.draw_stage(stages) plt.ylabel('Utilization (%)') plt.xlabel('Seconds') plt.legend([config.LABELS[worker] for worker in dataset['labels']], loc='lower left') plt.title('Memory Utilization per Machine') plt.gca().yaxis.grid(True) plt.suptitle(os.path.basename(filename)) plot_filename = utils.mkdir_p(filename + '_memory_machines.' + config.PLOT_FORMAT) plt.savefig(plot_filename) plt.close()
def disk_byte(self, dataset, filename, stages): if len(dataset) > 0: x = numpy.arange(0, len(dataset) * config.LOG_INTERVAL, config.LOG_INTERVAL) y = numpy.array(dataset) # Transform bytes to MBytes y /= (1024 * 1024) plt.plot(x, y) # Add Stages Lines self.draw_stage(stages) plt.ylabel('MBytes') plt.xlabel('Seconds') plt.title('Total Disk Byte Transfer') plt.suptitle(os.path.basename(filename)) plt.gca().yaxis.grid(True) # plt.axes().set_yscale('log') plot_filename = utils.mkdir_p(filename + '_disk_byte_total.' + config.PLOT_FORMAT) plt.savefig(plot_filename) plt.close() return y.tolist()
def __init__(self, output_dir, data_loader, imsize): self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') self.log_dir = os.path.join(output_dir, 'Log') self.testImage_dir = os.path.join(output_dir, 'TestImage') if cfg.TRAIN.FLAG: mkdir_p(self.model_dir) mkdir_p(self.image_dir) mkdir_p(self.log_dir) mkdir_p(self.testImage_dir) self.summary_writer = FileWriter(self.log_dir) s_gpus = cfg.GPU_ID.split(',') self.gpus = [int(ix) for ix in s_gpus] self.num_gpus = len(self.gpus) torch.cuda.set_device(self.gpus[0]) cudnn.benchmark = True self.batch_size = cfg.TRAIN.BATCH_SIZE * self.num_gpus self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL self.data_loader = data_loader self.num_batches = len(self.data_loader)
def swift_requests(plot_title, dataset, plot_dir): colors = {} color = None bars = {} figures = ['total', 'details'] x_labels = [] for server in dataset: requests = dataset[server]["Request"] for (i, request) in enumerate(requests): if request not in x_labels: x_labels.append(request) plt.figure("total") plt.bar(i, requests[request]["total"], align='center') plt.figure("details") codes = requests[request]["code"] bottom = 0 for code in codes: if code not in colors: color = utils.getNewColor(color) colors[code] = color bars[code] = None bar = plt.bar(i, codes[code], align='center', color=colors[code]["bar_color"], bottom=bottom, label=code) bottom += codes[code] if bars[code] is None: bars[code] = bar for figure_type in figures: plt.figure(figure_type) plt.xticks(np.arange(len(x_labels)), x_labels) plt.grid() plt.ylabel("# of Requests") title = plt.title(plot_title) # plt.yscale('log') lgd = None if figure_type is "details": lgd = plt.legend(bars.values(), colors.keys(), loc='upper right', bbox_to_anchor=(1.02, -0.1000), ncol=3) ax = plt.axes() rects = ax.patches bottom = 0 old_x = None for rect in rects: if old_x != rect.get_x(): bottom = 0 old_x = rect.get_x() height = rect.get_height() ax.text(rect.get_x() + rect.get_width()/2., (bottom + (height / 2.0)) - config.PLOT_FONT_SIZE, '%d' % int(height), ha='center', va='bottom') bottom += rect.get_height() plot_filename = utils.mkdir_p( os.path.join(plot_dir, "swift-requests-" + server + '-' + figure_type + '.' + config.PLOT_FORMAT)) if lgd is not None: plt.savefig(plot_filename, bbox_extra_artists=(lgd, title), bbox_inches='tight') else: plt.savefig(plot_filename, bbox_extra_artists=(title,), bbox_inches='tight') plt.close()
def main(env, args): # Initiate args useful for training start_episode = 0 args.current_frame = 0 args.eval_start = 0 args.test_num = 0 args.test_time = False args.best_avg_return = -1 # Make checkpoint path if there is none if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Instantiate metric tracking for progress bar # TODO: Add any other metrics we need to track # TODO: Still need to keep track of time for progress bar args.rewards = AverageMeter() args.returns = AverageMeter() args.episode_lengths = AverageMeter() args.losses = AverageMeter() # Model & experiences print("==> creating model '{}' with '{}' noise".format( args.alg, args.noise)) if args.alg == 'dqn': args.epsilon_greed_init = args.epsilon_greed args.initial_threshold = -math.log( 1 - args.epsilon_greed + args.epsilon_greed / args.action_dim) model = DQN(action_space=env.action_space, noise=args.noise, initial_threshold=args.initial_threshold) target_model = DQN(action_space=env.action_space, noise=args.noise, initial_threshold=args.initial_threshold) target_model.load_state_dict(model.state_dict()) args.memory = ReplayBuffer(args.replay_memory, args.use_cuda) else: model = PPO(action_space=env.action_space, noise=args.noise, clip_epsilon=args.clip_epsilon) # TODO: Instantiate RolloutStorage # rollouts = RolloutStorage(args.horizon, arg.processes?,...) # House models on GPU if needed if args.use_cuda: model.cuda() if args.alg == 'dqn': target_model.cuda() # Criterions and optimizers value_criterion = nn.functional.mse_loss if args.alg == 'dqn': if args.noise == 'adaptive': optimizer = optim.Adam(model.parameters(), lr=1e-4) else: optimizer = optim.RMSprop(model.parameters(), lr=2.5e-4, momentum=0.95, alpha=0.95, eps=1e-2) else: policy_criterion = model.surrogate_loss # TODO: revisit the choices here. Might be best to just go with defaults from PPO paper if args.noise == 'learned': optimizer = optim.RMSprop(model.parameters(), lr=2.5e-4, momentum=0.95, alpha=0.95, eps=1e-2) else: optimizer = optim.Adam(model.parameters(), lr=3e-4) # Resume # Unload status, meters, and previous state_dicts from checkpoint print("==> resuming from '{}' at frame {}". format(args.resume, args.start_frame) if args.resume else "==> starting from scratch at frame {}".format(args.start_frame)) title = '{}-{}'.format(args.noise, args.env_id) if args.resume: # Load checkpoint. assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) start_episode = checkpoint['episode'] + 1 args.current_frame = checkpoint['frame'] + 1 model.load_state_dict(checkpoint['state_dict']) if args.alg == 'dqn': target_model.load_state_dict(checkpoint['target_state_dict']) args.returns = checkpoint['returns'] args.best_avg_return = checkpoint['best_avg_return'] args.episode_lengths = checkpoint['episode_lengths'] optimizer.load_state_dict(checkpoint['optimizer']) args.logger = Logger(os.path.join(args.checkpoint, '{}-log.txt'.format(title)), title=title, resume=True) args.test_logger = Logger(os.path.join( args.checkpoint, 'eval-{}-log.txt'.format(title)), title=title, resume=True) else: args.logger = Logger(os.path.join(args.checkpoint, '{}-log.txt'.format(title)), title=title) args.logger.set_names( ['Episode', 'Frame', 'EpLen', 'AvgLoss', 'Return']) args.test_logger = Logger(os.path.join( args.checkpoint, 'eval-{}-log.txt'.format(title)), title=title) args.test_logger.set_names(['Frame', 'EpLen', 'Return']) # We need at least one experience in the replay buffer for DQN if args.alg == 'dqn': true_warmup = min(args.memory_warmup, args.replay_memory) print("==> filling replay buffer with {} transition(s)".format( true_warmup)) state = env.reset() for i in range(true_warmup): action = random.randrange(args.action_dim) successor, reward, done, _ = env.step(action) args.memory.add(state, action, reward, successor, done) state = successor if not done else env.reset() # Need next reset to be a true reset (due to EpisodicLifeEnv) env.was_real_done = True # Initialize bars args.bar = Bar('Training', max=args.n_frames) print("==> beginning training for {} frames".format(args.n_frames)) for episode in itertools.count(start_episode): # Train model if args.alg == 'dqn': env, model, target_model, optimizer, args = trainDQN( env, model, target_model, optimizer, value_criterion, args) else: env, model, optimizer, args = trainPPO(env, model, optimizer, value_criterion, policy_criterion, args) # Checkpoint model to disk is_best = args.returns.avg > args.best_avg_return if is_best: args.best_avg_return = args.returns.avg save_checkpoint( { 'episode': episode, 'frame': args.current_frame, 'state_dict': model.state_dict(), 'target_state_dict': target_model.state_dict() if args.alg == 'dqn' else None, 'rewards': args.rewards, 'returns': args.returns, 'best_avg_return': args.best_avg_return, 'episode_lengths': args.episode_lengths, 'losses': args.losses, 'optimizer': optimizer.state_dict() }, is_best, title) # Log metrics (episode, frame, episode length, average loss, return) args.logger.append([ episode, args.current_frame, args.episode_lengths.val, args.losses.avg, args.returns.val ]) # Reset frame-level meters args.losses.reset() args.rewards.reset() # Handle testing if args.test_time: # For testing only print("==> evaluating agent for {} frames at frame {}".format( args.eval_period, args.current_frame)) args.eval_start = args.current_frame args.testing_frame = args.current_frame args.test_bar = Bar('Testing', max=args.eval_period) args.test_rewards = AverageMeter() args.test_returns = AverageMeter() args.test_episode_lengths = AverageMeter() # Main testing loop while args.testing_frame - args.eval_start < args.eval_period: if args.alg == 'dqn': env, args = testDQN(env, model, args) else: env, args = testPPO(env, model, args) args.test_logger.append([ args.testing_frame - args.eval_start, args.test_episode_lengths.val, args.test_returns.val ]) args.test_episode_lengths.reset() args.test_rewards.reset() # For testing only: #break # Need next reset to be a true reset env.was_real_done = True # Need to turn off testing for next episode args.test_time = False args.test_num += 1 args.test_bar.finish() if args.current_frame > args.n_frames: break # For testing only: # if episode >= 100: # break #print('episode: {}'.format(episode)) # TODO: Handle cleanup args.bar.finish() args.logger.close() args.test_logger.close() args.logger.plot() env.close()
def main(args): # dataset, dataloader testset = MVSTestSet(root_dir=args.root_path, data_list=args.test_list, max_h=args.max_h, max_w=args.max_w, num_views=args.num_views) test_loader = DataLoader(testset, 1, shuffle=False, num_workers=4, drop_last=False) # build model model = UCSNet(stage_configs=list(map(int, args.net_configs.split(","))), lamb=args.lamb) # load checkpoint file specified by args.loadckpt print("Loading model {} ...".format(args.ckpt)) state_dict = torch.load(args.ckpt, map_location=torch.device("cpu")) model.load_state_dict(state_dict['model'], strict=True) print('Success!') model = nn.DataParallel(model) model.cuda() model.eval() tim_cnt = 0 for batch_idx, sample in enumerate(test_loader): scene_name = sample["scene_name"][0] frame_idx = sample["frame_idx"][0][0] scene_path = osp.join(args.save_path, scene_name) print('Process data ...') sample_cuda = dict2cuda(sample) print('Testing {} frame {} ...'.format(scene_name, frame_idx)) start_time = time.time() outputs = model(sample_cuda["imgs"], sample_cuda["proj_matrices"], sample_cuda["depth_values"]) end_time = time.time() outputs = dict2numpy(outputs) del sample_cuda tim_cnt += (end_time - start_time) print('Finished {}/{}, time: {:.2f}s ({:.2f}s/frame).'.format( batch_idx + 1, len(test_loader), end_time - start_time, tim_cnt / (batch_idx + 1.))) rgb_path = osp.join(scene_path, 'rgb') mkdir_p(rgb_path) depth_path = osp.join(scene_path, 'depth') mkdir_p(depth_path) cam_path = osp.join(scene_path, 'cam') mkdir_p(cam_path) conf_path = osp.join(scene_path, 'confidence') mkdir_p(conf_path) ref_img = sample["imgs"][0, 0].numpy().transpose(1, 2, 0) * 255 ref_img = np.clip(ref_img, 0, 255).astype(np.uint8) Image.fromarray(ref_img).save(rgb_path + '/{:08d}.png'.format(frame_idx)) cam = sample["proj_matrices"]["stage3"][0, 0].numpy() save_cameras(cam, cam_path + '/cam_{:08d}.txt'.format(frame_idx)) for stage_id in range(3): cur_res = outputs["stage{}".format(stage_id + 1)] cur_dep = cur_res["depth"][0] cur_conf = cur_res["confidence"][0] write_pfm( depth_path + "/dep_{:08d}_{}.pfm".format(frame_idx, stage_id + 1), cur_dep) write_pfm( conf_path + '/conf_{:08d}_{}.pfm'.format(frame_idx, stage_id + 1), cur_conf) print('Saved results for {}/{} (resolution: {})'.format( scene_name, frame_idx, cur_dep.shape)) torch.cuda.empty_cache() gc.collect()
def gather(self): # Returns a parsed, processed Google service credentials object. credentials = load_credentials() if credentials is None: logging.warning("No BigQuery credentials provided.") logging.warning("Set BIGQUERY_CREDENTIALS or BIGQUERY_CREDENTIALS_PATH environment variables.") exit(1) # When using this form of instantiation, the client won't pull # the project_id out of the creds, has to be set explicitly. client = bigquery.Client( project=credentials.project_id, credentials=credentials ) # Allow override of default timeout (in seconds). timeout = int(self.options.get("timeout", default_timeout)) # Construct the query. query = query_for(self.suffixes) logging.debug("Censys query:\n%s\n" % query) # Plan to store in cache/censys/export.csv. download_path = utils.cache_path( "export", "censys", ext="csv", cache_dir=self.cache_dir) # Reuse of cached data can be turned on with --cache. cache = self.options.get("cache", False) if (cache is True) and os.path.exists(download_path): logging.warning("Using cached download data.") # But by default, fetch new data from the BigQuery API, # and write it to the expected download location. else: # Ensure cache destination exists. utils.mkdir_p(os.path.dirname(download_path)) logging.warning("Kicking off SQL query job.") rows = None # Actually execute the query. try: # Executes query and loads all results into memory. query_job = client.query(query) iterator = query_job.result(timeout=timeout) rows = list(iterator) except google.api_core.exceptions.Forbidden: logging.warning("Access denied to Censys' BigQuery tables.") except: logging.warning(utils.format_last_exception()) logging.warning("Error talking to BigQuery, aborting.") # At this point, the job is complete and we need to download # the resulting CSV URL in results_url. logging.warning("Caching results of SQL query.") download_file = open(download_path, 'w', newline='') download_writer = csv.writer(download_file) download_writer.writerow(["Domain"]) # will be skipped on read # Parse the rows and write them out as they were returned (dupes # and all), to be de-duped by the central gathering script. for row in rows: domains = row['common_name'] + row['dns_names'] for domain in domains: download_writer.writerow([domain]) # End CSV writing. download_file.close() # Whether we downloaded it fresh or not, read from the cached data. for domain in utils.load_domains(download_path): if domain: yield domain
def plot_results(game, mooc, path_data, experiment): path_plots = f'plots/tour_{experiment}_{game}_l{l1}_{l2}' mkdir_p(path_plots) df1 = pd.read_csv(f'{path_data}/agent1_payoff_{info}.csv') ax = sns.lineplot(x='Episode', y='Payoff', linewidth=2.0, data=df1, ci='sd', label=f'Agent 1') df2 = pd.read_csv(f'{path_data}/agent2_payoff_{info}.csv') ax = sns.lineplot(x='Episode', y='Payoff', linewidth=2.0, data=df2, ci='sd', label=f'Agent2') ax.set(ylabel='Scalarised payoff per step') ax.set(xlabel='Iterations') # ax.set_ylim(0, 14) ax.set_xlim(0, episodes) plot_name = f"{path_plots}/payoffs" # plt.title("Agent 1") plt.savefig(plot_name + ".pdf") plt.clf() if game in ['iagRNE', 'iagR', 'iagM']: x_axis_labels = ["L", "M"] y_axis_labels = ["L", "M"] else: x_axis_labels = ["L", "M", "R"] y_axis_labels = ["L", "M", "R"] df = pd.read_csv(f'{path_data}/states_{info}_{l1}_{l2}.csv', header=None) ax = sns.heatmap(df, annot=True, cmap="YlGnBu", vmin=0, vmax=1, xticklabels=x_axis_labels, yticklabels=y_axis_labels) plot_name = f"{path_plots}/states" plt.savefig(plot_name + ".pdf") plt.clf() # action probs df1 = pd.read_csv(f'{path_data}/agent1_probs_{info}.csv') ax = sns.lineplot(x='Episode', y='Action 1', linewidth=2.0, data=df1, ci='sd', label='L') ax = sns.lineplot(x='Episode', y='Action 2', linewidth=2.0, data=df1, ci='sd', label='M') if game not in ['iagRNE', 'iagR', 'iagM']: ax = sns.lineplot(x='Episode', y='Action 3', linewidth=2.0, data=df1, ci='sd', label='R') ax.set(ylabel='Action probability') ax.set(xlabel='Iterations') ax.set_ylim(0, 1) ax.set_xlim(0, episodes) plot_name = f"{path_plots}/probs_ag1" plt.title(f"Action probabilities - Agent 1") plt.savefig(plot_name + ".pdf") plt.clf() df1 = pd.read_csv(f'{path_data}/agent2_probs_{info}.csv') ax = sns.lineplot(x='Episode', y='Action 1', linewidth=2.0, data=df1, ci='sd', label='L') ax = sns.lineplot(x='Episode', y='Action 2', linewidth=2.0, data=df1, ci='sd', label='M') if game not in ['iagRNE', 'iagR', 'iagM']: ax = sns.lineplot(x='Episode', y='Action 3', linewidth=2.0, data=df1, ci='sd', label='R') ax.set(ylabel='Action probability') ax.set(xlabel='Iterations') ax.set_ylim(0, 1) ax.set_xlim(0, episodes) plot_name = f"{path_plots}/probs_ag2" plt.title(f"Action probabilities - Agent 2") plt.savefig(plot_name + ".pdf") plt.clf()
def play(n_lookaheads, trials, info, mooc, game, experiment): state_distribution_log = np.zeros((env.NUM_ACTIONS, env.NUM_ACTIONS)) print("start iterations with", n_lookaheads[0], 'and', n_lookaheads[1], "lookaheads:") for trial in range(trials): if trial % 10 == 0: print(f"Trial {trial}...") agents = [None, None] for i in range(len(experiment)): if experiment[i] == 'AC': agents[i] = ActorCriticAgent(i, hpAC, u[i], env.NUM_ACTIONS) elif experiment[i] == 'ACom' or experiment[i] == 'ACoa': agents[i] = OppoModelingACAgent(i, hpAC, u[i], env.NUM_ACTIONS) elif experiment[i] == 'AComGP': agents[i] = UMOMACAgent(i, hpAC, u[i], env.NUM_ACTIONS, hpGP=hpGP) elif experiment[i] == 'LOLA': if info == '0M': agents[i] = PGDiceBase(i, env, hpL, u[i], mooc, u[i - 1]) else: agents[i] = PGDice1M(i, env, hpL, u[i], mooc, u[i - 1]) elif experiment[i] == 'LOLAom': agents[i] = PGDiceOM(i, env, hpL, u[i], mooc, hpGP=hpGP) elif experiment[i] == 'Q': agents[i] = QLearningAgent(i, hpQ, u[i], env.NUM_ACTIONS) for update in range(hpL.n_update): # rollout actual current policies: if update % 100 == 0: print(f"Episode {update}...") r_s, a_s = step(agents, win_rollout) act_probs = [get_act_probs(a_s[0]), get_act_probs(a_s[1])] r, a = step(agents, 1) if experiment == ['LOLA', 'LOLA']: theta1_ = agents[0].theta.clone().detach().requires_grad_(True) theta2_ = agents[1].theta.clone().detach().requires_grad_(True) LOLA_loop(agents[0], theta2_, n_lookaheads[0]) LOLA_loop(agents[1], theta1_, n_lookaheads[1]) if experiment == ['ACoa', 'ACoa']: theta1_ = agents[0].policy theta2_ = agents[1].policy agents[0].set_op_theta(theta2_) agents[1].set_op_theta(theta1_) agents[0].update(a[0], r[0], a[1]) agents[1].update(a[1], r[1], a[0]) if experiment == ['LOLA', 'ACoa']: theta1_ = torch.sigmoid(agents[0].theta.clone().detach()) theta2_ = torch.tensor(agents[1].policy).requires_grad_(True) LOLA_loop(agents[0], theta2_, n_lookaheads[0]) agents[1].set_op_theta(theta1_.numpy()) agents[1].update(a[1], r[1], a[0]) if experiment == ['ACoa', 'LOLA']: theta1_ = torch.tensor(agents[0].policy).requires_grad_(True) theta2_ = torch.sigmoid(agents[1].theta.clone().detach()) LOLA_loop(agents[1], theta1_, n_lookaheads[1]) agents[0].set_op_theta(theta2_.numpy()) agents[0].update(a[0], r[0], a[1]) if experiment == ['LOLA', 'AC']: theta2_ = torch.tensor(agents[1].policy).requires_grad_(True) LOLA_loop(agents[0], theta2_, n_lookaheads[0]) agents[1].update(a[1], r[1]) if experiment == ['AC', 'LOLA']: theta1_ = torch.tensor(agents[0].policy).requires_grad_(True) LOLA_loop(agents[1], theta1_, n_lookaheads[1]) agents[0].update(a[0], r[0]) for i, exp in enumerate(experiment): if exp == 'LOLAom': agents[i].update_logs(np.log(act_probs[i - 1])) if update > 1: LOLAom_loop(agents[i], torch.tensor(np.log(act_probs[i - 1])), n_lookaheads[i]) if exp == 'AC': agents[i].update(a[i], r[i]) if exp == 'ACom': if update > 1: agents[i].set_op_theta(act_probs[i - 1]) agents[i].update(a[i], r[i], a[i - 1]) if exp == 'AComGP': agents[i].update_logs(act_probs[i - 1]) if update > 1: AComGP_loop(agents[i], a[i], r[i], a[i - 1], act_probs[i - 1], n_lookaheads[i]) if exp == 'Q': agents[i].update(a[i], r[i]) a1, a2 = a_s r1, r2 = r_s if update >= (0.1 * hpL.n_update): for rol_a in range(len(a1)): for batch_a in range(len(a1[rol_a])): state_distribution_log[a1[rol_a][batch_a], a2[rol_a][batch_a]] += 1 ret1, score1 = get_return(r1, u1, mooc) ret2, score2 = get_return(r2, u2, mooc) if env.NUM_ACTIONS == 2: for i in range(len(act_hist_log)): act_hist_log[i].append([ update, trial, n_lookaheads[i], act_probs[i][0], act_probs[i][1] ]) else: for i in range(len(act_hist_log)): act_hist_log[i].append([ update, trial, n_lookaheads[i], act_probs[i][0], act_probs[i][1], act_probs[i][2] ]) payoff_episode_log1.append( [update, trial, n_lookaheads[0], score1]) payoff_episode_log2.append( [update, trial, n_lookaheads[1], score2]) if trial % 5 == 0: columns = ['Episode', 'Trial', 'Lookahead', 'Payoff'] df1 = pd.DataFrame(payoff_episode_log1, columns=columns) df2 = pd.DataFrame(payoff_episode_log2, columns=columns) path_data = f'results/tour_{experiment}_{game}_l{n_lookaheads[0]}_{n_lookaheads[1]}' # /{mooc}/{hp.use_baseline}' mkdir_p(path_data) df1.to_csv(f'{path_data}/agent1_payoff_{info}.csv', index=False) df2.to_csv(f'{path_data}/agent2_payoff_{info}.csv', index=False) state_distribution = state_distribution_log / ( hpL.batch_size * (0.9 * hpL.n_update) * (trial + 1) * win_rollout) df = pd.DataFrame(state_distribution) print(np.sum(state_distribution)) df.to_csv( f'{path_data}/states_{info}_{n_lookaheads[0]}_{n_lookaheads[1]}.csv', index=False, header=None) if env.NUM_ACTIONS == 3: columns = [ 'Episode', 'Trial', 'Lookahead', 'Action 1', 'Action 2', 'Action 3' ] else: columns = [ 'Episode', 'Trial', 'Lookahead', 'Action 1', 'Action 2' ] df1 = pd.DataFrame(act_hist_log[0], columns=columns) df2 = pd.DataFrame(act_hist_log[1], columns=columns) df1.to_csv(f'{path_data}/agent1_probs_{info}.csv', index=False) df2.to_csv(f'{path_data}/agent2_probs_{info}.csv', index=False) del df1, df2, df columns = ['Episode', 'Trial', 'Lookahead', 'Payoff'] df1 = pd.DataFrame(payoff_episode_log1, columns=columns) df2 = pd.DataFrame(payoff_episode_log2, columns=columns) path_data = f'results_local/tour_{experiment}_{game}_l{n_lookaheads[0]}_{n_lookaheads[1]}' mkdir_p(path_data) df1.to_csv(f'{path_data}/agent1_payoff_{info}.csv', index=False) df2.to_csv(f'{path_data}/agent2_payoff_{info}.csv', index=False) state_distribution = state_distribution_log / (hpL.batch_size * (0.9 * hpL.n_update) * trials * win_rollout) df = pd.DataFrame(state_distribution) print(np.sum(state_distribution)) df.to_csv( f'{path_data}/states_{info}_{n_lookaheads[0]}_{n_lookaheads[1]}.csv', index=False, header=None) if env.NUM_ACTIONS == 3: columns = [ 'Episode', 'Trial', 'Lookahead', 'Action 1', 'Action 2', 'Action 3' ] else: columns = ['Episode', 'Trial', 'Lookahead', 'Action 1', 'Action 2'] df1 = pd.DataFrame(act_hist_log[0], columns=columns) df2 = pd.DataFrame(act_hist_log[1], columns=columns) df1.to_csv(f'{path_data}/agent1_probs_{info}.csv', index=False) df2.to_csv(f'{path_data}/agent2_probs_{info}.csv', index=False) del df1, df2, df
with open(swift_log_filename) as swift_log_file: result = parse_lines(swift_log_file.readlines(), start_timestamp=start_timestamp, end_timestamp=end_timestamp) print("# Swift logs: " + str(result)) total_request_received = 0 for server in result: for method in result[server]["Request"]: total_request_received += result[server]["Request"][method]["total"] print("# Total Swift Requests Received: {}".format(total_request_received)) return result if __name__ == '__main__': LOGS_PATH = os.path.normpath(config.LOGS_PATH) PLOT_DIR = os.path.join(os.path.normpath(config.PLOT_DIR), os.path.basename(LOGS_PATH)) OUTPUT_FILE = utils.mkdir_p(os.path.join(PLOT_DIR, 'output.txt')) sys.stdout = Logger(OUTPUT_FILE) data = {} if not config.PLOTS_ONLY: for root, subdirs, files in os.walk(LOGS_PATH): scenario_alias = os.path.basename(os.path.dirname(root)) if scenario_alias in config.SCENARIOS: scenario = config.SCENARIOS[scenario_alias] if scenario not in data: data[scenario] = {} workload = os.path.basename(root) if workload in config.WORKLOADS: print "# Parsing Workload {} for Scenario {}".format(config.WORKLOADS[workload], scenario_alias) data[scenario][config.WORKLOADS[workload]] = parse_app(root, scenario, workload) with open(os.path.join(LOGS_PATH, "parsed_data.dat"), "w") as parsed_data_file:
def tasks_multi_figure(dataset, filename): workloads = [] scenarios = [] colors = {} color = None for scenario in dataset: color = utils.getNewColor(color) colors[scenario] = color scenarios.append(scenario) for workload in dataset[scenario]: if workload not in workloads: workloads.append(workload) figures = ['total'] for workload in workloads: print "# --- Workload {} ---".format(workload) for scenario in scenarios: print "# Scenario {}".format(scenario) try: tasks_runtimes = dataset[scenario][workload]['app']['tasks']['runtimes'] total_tasks_runtimes = [] for task_type in tasks_runtimes: if tasks_runtimes[task_type] is not None and len(tasks_runtimes[task_type]) > 0 and sum( tasks_runtimes[task_type]) > 0: if task_type not in figures: figures.append(task_type) total_tasks_runtimes += tasks_runtimes[task_type] tasks_runtimes[task_type].sort() print "# Task type {} composed by {} tasks".format(task_type, len(tasks_runtimes[task_type])) cdf = [] for i, value in enumerate(tasks_runtimes[task_type]): cdf.append(i / float(len(tasks_runtimes[task_type]))) x = np.array(tasks_runtimes[task_type]) cy = np.array(cdf) markevery = cy.size / 5 plt.figure(figures.index(task_type)) plt.grid(True, which='both') plt.plot(x, cy, color=colors[scenario]["line_color"], dashes=colors[scenario]["dash"], marker=colors[scenario]["marker"], markersize=colors[scenario]["markerSize"], markevery=markevery) total_tasks_runtimes.sort() print "# Total Tasks number is {}".format(len(total_tasks_runtimes)) cdf = [] for i, value in enumerate(total_tasks_runtimes): cdf.append(i / float(len(total_tasks_runtimes))) x = np.array(total_tasks_runtimes) cy = np.array(cdf) markevery = cy.size / 5 plt.figure(figures.index('total')) plt.grid(True) # , which='both') plt.plot(x, cy, color=colors[scenario]["line_color"], dashes=colors[scenario]["dash"], marker=colors[scenario]["marker"], markersize=colors[scenario]["markerSize"], markeredgewidth=colors[scenario]["markeredgewidth"], markeredgecolor=colors[scenario]["line_color"], linewidth=3.0, markevery=markevery) except KeyError: pass for task_type in figures: plt.figure(figures.index(task_type)) plt.ylim((0, 1.05)) plt.xscale('log') xlabel = plt.xlabel('Seconds') # title = plt.title('CDF') # title = plt.title(workload) plot_filename = utils.mkdir_p(os.path.join(filename, 'cdf_task_' + task_type + '_' + workload + '.' + config.PLOT_FORMAT)) if task_type == "total": plt.savefig(plot_filename, bbox_inches='tight') plot_filename = utils.mkdir_p(os.path.join(filename, 'cdf_task_' + task_type + '_' + workload + '_legend.' + config.PLOT_FORMAT)) # lgd = plt.legend(scenarios, loc='upper right', # bbox_to_anchor=(1.02, -0.1000), ncol=3) lgd = plt.legend(scenarios, loc=2, bbox_to_anchor=(1.01, 1), ncol=1, borderaxespad=0.) # plt.savefig(plot_filename, bbox_extra_artists=(lgd, xlabel, title), bbox_inches='tight') plt.savefig(plot_filename, bbox_extra_artists=(lgd, xlabel), bbox_inches='tight') plt.close() print "# ------------------------------------"
if __name__ == '__main__': import argparse import json parser = argparse.ArgumentParser(description='set input arguments') parser.add_argument('--config', action="store", type=str, default='config/config.json') parser.add_argument('--run_dir', action="store", type=str, default='RUN') args = parser.parse_args() if not os.path.exists(args.run_dir): os.makedirs(args.run_dir) assert (os.path.exists(args.run_dir) and os.path.isdir(args.run_dir)) assert (os.path.exists(args.config) and not os.path.isdir(args.config)) hypes = json.load(open(args.config, 'r')) hypes_train = hypes['train'] project_name = hypes_train['project_name'] hypes_train['run_dir'] = os.path.join(args.run_dir, project_name) hypes_train['log_dir'] = os.path.join(args.run_dir, project_name, 'logs') hypes_train['ckpt_dir'] = os.path.join(args.run_dir, project_name, 'checkpoints') mkdir_p(hypes_train['run_dir']) mkdir_p(hypes_train['log_dir']) mkdir_p(hypes_train['ckpt_dir']) # kick off training main(hypes)