def testSharing_withOtherSessionBasedFileWriters(self): logdir = self.get_temp_dir() with session.Session() as sess: # Initial file writer writer1 = writer.FileWriter(session=sess, logdir=logdir) writer1.add_summary(self._createTaggedSummary("one"), 1) writer1.flush() # File writer, should share file with writer1 writer2 = writer.FileWriter(session=sess, logdir=logdir) writer2.add_summary(self._createTaggedSummary("two"), 2) writer2.flush() # File writer with different logdir (shouldn't be in this logdir at all) writer3 = writer.FileWriter(session=sess, logdir=logdir + "-other") writer3.add_summary(self._createTaggedSummary("three"), 3) writer3.flush() # File writer in a different session (should be in separate file) time.sleep(1.1) # Ensure filename has a different timestamp with session.Session() as other_sess: writer4 = writer.FileWriter(session=other_sess, logdir=logdir) writer4.add_summary(self._createTaggedSummary("four"), 4) writer4.flush() # One more file writer, should share file with writer1 writer5 = writer.FileWriter(session=sess, logdir=logdir) writer5.add_summary(self._createTaggedSummary("five"), 5) writer5.flush() event_paths = iter(sorted(glob.glob(os.path.join(logdir, "event*")))) # First file should have tags "one", "two", and "five" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("one", next(events).summary.value[0].tag) self.assertEqual("two", next(events).summary.value[0].tag) self.assertEqual("five", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # Second file should have just "four" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("four", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # No more files self.assertRaises(StopIteration, lambda: next(event_paths)) # Just check that the other logdir file exists to be sure we wrote it self.assertTrue(glob.glob(os.path.join(logdir + "-other", "event*")))
def get_array_from_event_multi_episode(event_path, tag, rollout_indices, m): num_rollouts = len(rollout_indices) r1 = [[] for _ in range(num_rollouts)] steps = [] try: for event in summary_iterator(event_path): if hasattr(event.summary, 'value') and len(event.summary.value) > 0: for i, n in enumerate(rollout_indices): if event.summary.value[0].tag == tag + '{}'.format(n): r1[i].append(event.summary.value[0].simple_value) if i == 0: steps.append(event.step) except: pass if len(np.unique([len(r) for r in r1])) > 1: print('warning: different lengths found') min_len = min([len(r) for r in r1]) arr = np.array([np.array(r)[:min_len] for r in r1]).sum(axis=0) # sum over all rollouts steps = np.array(steps) arr = moving_average(arr, m, only_past=True) return arr, steps
def extract_values(logdir, filename, nave=100): logdir = "runs" ploss_eventfiles = os.popen("find %s/ -name *.out.* | grep %s" % (logdir, filename)) loss_eventfiles = ploss_eventfiles.readlines() dat = [] for l in loss_eventfiles: print "parsing: ", l.strip() logpath = l.strip() for summary in summary_iterator(logpath): for v in summary.summary.value: #print v.tag,v.simple_value dat.append((summary.step, v.simple_value)) print "extracted ", len(dat), " points" dat.sort() npts = int(len(dat) / nave) g = rt.TGraphErrors(npts) for n in xrange(npts): valave = 0.0 xx = 0.0 for (step, val) in dat[n * nave:(n + 1) * nave]: valave += val xx += val * val valave /= float(nave) xx /= float(nave) sig = sqrt(xx - valave * valave) g.SetPoint(n, n * nave, valave) g.SetPointError(n, 0.0, sig) return dat, g
def run(args: argparse.Namespace): runs = {} for path in glob.glob(args.path + '/*'): basename = os.path.basename(path) if re.match('^.*_[0-9]+$', basename) is not None: runs[basename] = [] else: basename = basename[:basename.rindex('_')] events_file = glob.glob(path + '/tb/*')[0] it = summary_iterator(events_file) rewards = [] for x in it: try: tag = x.summary.value[0].tag value = x.summary.value[0].simple_value if tag.startswith('Reward_Train/Task_'): idx = int(tag[tag.rindex('_') + 1:]) while idx >= len(rewards): rewards.append([]) rewards[idx].append(value) except Exception as e: print(e) min_length = min([len(r) for r in rewards]) rewards = np.array([r[:min_length] for r in rewards]) reduced_rewards = np.mean(rewards, 0) import pdb pdb.set_trace()
def testTrainReplicated(self): if ipu_utils.running_on_ipu_model(): self.skipTest( "Replicated top level graphs are not supported on the " "IPU_MODEL target") def my_model_fn(features, labels, mode): # pylint: disable=unused-argument self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode) loss = ipu.ops.cross_replica_ops.cross_replica_sum(features, name="loss") train_op = array_ops.identity(loss) return model_fn_lib.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) def my_input_fn(): dataset = tu.create_dual_increasing_dataset(10, data_shape=[1], label_shape=[1]) dataset = dataset.batch(batch_size=1, drop_remainder=True) return dataset ipu_options = ipu_utils.create_ipu_config() ipu_options = ipu_utils.auto_select_ipus(ipu_options, 4) config = ipu_run_config.RunConfig( ipu_run_config=ipu_run_config.IPURunConfig( iterations_per_loop=2, num_replicas=4, ipu_options=ipu_options), log_step_count_steps=1, save_summary_steps=1) estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn, config=config) session_run_counter = _SessionRunCounter() num_steps = 6 estimator.train(input_fn=my_input_fn, steps=num_steps, hooks=[session_run_counter]) self.assertEqual( session_run_counter.num_session_runs, num_steps // config.ipu_run_config.iterations_per_loop) model_dir = estimator.model_dir events_file = glob.glob(model_dir + "/*tfevents*") assert len(events_file) == 1 events_file = events_file[0] loss_output = list() for e in summary_iterator.summary_iterator(events_file): for v in e.summary.value: if "loss" in v.tag: loss_output.append(v.simple_value) # loss is averaged across iterations per loop self.assertEqual(loss_output, [14.0, 16.0, 18.0])
def _get_best_eval_result(self, event_files): """Get the best eval result from event files. Args: event_files: Absolute pattern of event files. Returns: The best eval result. """ if not event_files: return None best_eval_result = None for event_file in gfile.Glob(os.path.join(event_files)): for event in summary_iterator.summary_iterator(event_file): if event.HasField('summary'): print("event: ", event) event_eval_result = {} for value in event.summary.value: if value.HasField('simple_value'): event_eval_result[value.tag] = value.simple_value print(event_eval_result) if 'loss' in event_eval_result.keys() and\ (best_eval_result is None or self._compare_fn(best_eval_result, event_eval_result)): print("update best_eval with:", event_eval_result) best_eval_result = event_eval_result return best_eval_result
def extract_macaw(path, terminate: int = None): files = [f for f in os.listdir(path) if 'events' in f] path = f'{path}/{files[0]}' print(path) y = [] x = [] try: for entry in summary_iterator(path): try: if len(entry.summary.value): v = entry.summary.value[0] step, tag, value = entry.step, v.tag, v.simple_value if terminate and step > terminate: break if tag != 'Eval_Reward/Mean': continue #print(tag, step, value) y.append(value) x.append(step) except Exception as e: print(entry) raise e except Exception as e: print(e) y = gaussian_filter1d(y, sigma=4) return np.array(x).astype(np.float32) / 1000, np.array(y)
def __retrieve_summaries_for_event_file(team_uuid, model_uuid, folder, event_file_path, retrieve_scalars, retrieve_images): steps_set = set() tags_set = set() summaries = [] for event in summary_iterator(event_file_path): values = {} for value in event.summary.value: if retrieve_scalars and value.HasField('simple_value'): tags_set.add(value.tag) values[value.tag] = value.simple_value elif retrieve_images and value.HasField('image'): exists, image_url = blob_storage.get_event_summary_image_download_url( team_uuid, model_uuid, folder, event.step, value.tag, value.image.encoded_image_string) if exists: tags_set.add(value.tag) values[value.tag] = { 'width': value.image.width, 'height': value.image.height, 'image_url': image_url, } if len(values) > 0: steps_set.add(event.step) summary = { 'step': event.step, } summary['values'] = values summaries.append(summary) return sorted(tags_set), sorted(steps_set), summaries
def extract_macaw(path, terminate: int = None): y = [] x = [] pearl = False try: for entry in summary_iterator(path): try: if len(entry.summary.value): v = entry.summary.value[0] step, tag, value = entry.step, v.tag, v.simple_value if terminate and step > terminate: break if tag != 'Eval_Reward/Mean' and tag != 'test_tasks_mean_reward/mean_return': continue if tag == 'test_tasks_mean_reward/mean_return': pearl = True step *= 2000 #print(tag, step, value) y.append(value) x.append(step) except Exception as e: print(entry) raise e except Exception as e: print(e) sigma = 5 y = gaussian_filter1d(y, sigma=sigma if not pearl else sigma / 8.) return np.array(x).astype(np.float32) / 1000, np.array(y)
def read_eventfile(self, logdir): # read the oldest eventfile from logdir event_paths = glob.glob(os.path.join(logdir, "event*")) if len(event_paths) == 0: # no eventfiles in local directory, try to read from hdfs hdfs_paths = os.path.join(logdir, "event*") output = subprocess.Popen(['hadoop', 'fs', '-ls', hdfs_paths], stdout=subprocess.PIPE, stderr=subprocess.PIPE) search_results = [] for line in output.stdout: search_results.append(line) if len(search_results) == 0: return [] # sorted by date and time search_results = sorted( search_results, key=lambda x: " ".join([x.split()[5], x.split()[6]])) event_paths = [x.split()[-1] for x in search_results] else: event_paths = sorted(event_paths, key=lambda x: os.path.getctime(x)) events = summary_iterator(event_paths[0]) valid_events = [ e for e in events if e.summary.value and e.summary.value[0].tag == "loss" ] return valid_events
def get_hyperparameter(path): """ Reads the tf.Event files generated by `hp.hparams` in order to retrieve model hyperparameters Args: path (str): Path to the `events.out.tfevents.*.v2` file Returns: Dict: A dict. with keys given by the names of the hyperparameters and their values """ si = summary_iterator(path) for event in si: for value in event.summary.value: proto_bytes = value.metadata.plugin_data.content plugin_data = plugin_data_pb2.HParamsPluginData.FromString( proto_bytes) if plugin_data.HasField("session_start_info"): hp = plugin_data.session_start_info.hparams # convert protocol buffer to dict. hp = {k: list(protobuf_to_dict(hp[k]).values())[0] for k in hp} return hp return False
def parse(path): """Takes an events file and outputs a dictionary mapping a metric to a list of values""" d = defaultdict(list) for e in summary_iterator(path): for v in e.summary.value: d[v.tag].append(v.simple_value) return d
def testTrain(self): shutil.rmtree("testlogs", True) tu.configure_ipu_system() run_cfg = run_config.RunConfig() classifier = estimator.Estimator(model_fn=model_fn, config=run_cfg, model_dir="testlogs") classifier.train(input_fn=input_fn, steps=16) event_file = glob.glob("testlogs/event*") self.assertTrue(len(event_file) == 1) compile_for_ipu_count = 0 for summary in summary_iterator.summary_iterator(event_file[0]): for val in summary.summary.value: if val.tag == "compile_summary": for evt_str in val.tensor.string_val: evt = IpuTraceEvent.FromString(evt_str) if (evt.type == IpuTraceEvent.COMPILE_END and len( evt.compile_end.compilation_report)) > 0: compile_for_ipu_count += 1 # Initialization graph and main graph self.assertEqual(compile_for_ipu_count, 2)
def sum_log(path, blocking=['adj']): tags = get_keys(path) vals = dict() steps = dict() for t in tags: valid_tag = True for b in blocking: if str(t).find(b) >= 0: valid_tag = False break if valid_tag: vals[t] = [] try: for e in summary_iterator(path): for v in e.summary.value: if vals.get(v.tag, None) is not None: vals[v.tag].append(tensor_util.MakeNdarray(v.tensor)) # Dirty catch of DataLossError except: print('Event file possibly corrupt: {}'.format(path)) return vals
def _get_best_eval_result(self, event_files): """Get the best eval result from event files. Args: event_files: Absolute pattern of event files. Returns: The best eval result. """ if not event_files: return None best_eval_result = None for event_file in gfile.Glob(os.path.join(event_files)): for event in summary_iterator.summary_iterator(event_file): if event.HasField('summary'): event_eval_result = {} for value in event.summary.value: if value.HasField('simple_value'): event_eval_result[value.tag] = value.simple_value if event_eval_result: if best_eval_result is None or self._compare_fn( best_eval_result, event_eval_result): best_eval_result = event_eval_result return best_eval_result
def _validate_tbx_result(self, params=None, excluded_params=None): try: from tensorflow.python.summary.summary_iterator \ import summary_iterator except ImportError: print("Skipping rest of test as tensorflow is not installed.") return events_file = list(glob.glob(f"{self.test_dir}/events*"))[0] results = [] excluded_params = excluded_params or [] for event in summary_iterator(events_file): for v in event.summary.value: if v.tag == "ray/tune/episode_reward_mean": results.append(v.simple_value) elif v.tag == "_hparams_/experiment" and params: for key in params: self.assertIn(key, v.metadata.plugin_data.content) for key in excluded_params: self.assertNotIn(key, v.metadata.plugin_data.content) elif v.tag == "_hparams_/session_start_info" and params: for key in params: self.assertIn(key, v.metadata.plugin_data.content) for key in excluded_params: self.assertNotIn(key, v.metadata.plugin_data.content) self.assertEqual(len(results), 3) self.assertSequenceEqual([int(res) for res in results], [4, 5, 6])
def _verify_events(self, output_dir, names_to_values): """Verifies that the given `names_to_values` are found in the summaries. Also checks that a GraphDef was written out to the events file. Args: output_dir: An existing directory where summaries are found. names_to_values: A dictionary of strings to values. """ # Check that the results were saved. The events file may have additional # entries, e.g. the event version stamp, so have to parse things a bit. output_filepath = glob.glob(os.path.join(output_dir, '*')) self.assertEqual(len(output_filepath), 1) events = summary_iterator.summary_iterator(output_filepath[0]) summaries = [] graph_def = None for event in events: if event.summary.value: summaries.append(event.summary) elif event.graph_def: graph_def = event.graph_def values = [] for summary in summaries: for value in summary.value: values.append(value) saved_results = {v.tag: v.simple_value for v in values} for name in names_to_values: self.assertAlmostEqual( names_to_values[name], saved_results[name], 5) self.assertIsNotNone(graph_def)
def _get_best_eval_result(self, event_files): """Get the best eval result from event files. Args: event_files: Absolute pattern of event files. Returns: The best eval result. """ if not event_files: return None event_count = 0 best_eval_result = None for event_file in gfile.Glob(os.path.join(event_files)): for event in summary_iterator.summary_iterator(event_file): if event.HasField('summary'): event_eval_result = {} for value in event.summary.value: if value.HasField('simple_value'): event_eval_result[value.tag] = value.simple_value if event_eval_result: if best_eval_result is None or self._compare_fn( best_eval_result, event_eval_result): event_count += 1 best_eval_result = event_eval_result if event_count < 2: return None return best_eval_result
def get_summary_logs(pattern=None, dataset_type='train', bucket_name='my-bucket', project_name='covid-bert'): f_names = glob.glob( os.path.join(find_project_root(), 'data', bucket_name, project_name, 'pretrain', '*', 'summaries', dataset_type, '*')) files = [] for f_name in f_names: run_name = f_name.split('/')[-4] if pattern is None or re.search(pattern, run_name): files.append(f_name) if len(files) == 0: return pd.DataFrame() df = pd.DataFrame() for f_name in files: run_name = f_name.split('/')[-4] summary_data = defaultdict(dict) for e in summary_iterator(f_name): for v in e.summary.value: if v.simple_value: summary_data[v.tag].update( {int(e.step): float(v.simple_value)}) else: summary_data[v.tag].update({ int(e.step): float(tensor_util.MakeNdarray(v.tensor)) }) summary_data = pd.DataFrame(summary_data) summary_data['run_name'] = run_name df = pd.concat([df, summary_data], axis=0) return df
def _extract_loss_and_global_step(self, event_folder): """Returns the loss and global step in last event.""" event_paths = glob.glob(os.path.join(event_folder, "events*")) self.assertNotEmpty( event_paths, msg="Event file not found in dir %s" % event_folder) loss = None global_step_count = None for e in summary_iterator.summary_iterator(event_paths[-1]): current_loss = None for v in e.summary.value: if v.tag == "loss": current_loss = v.simple_value # If loss is not found, global step is meaningless. if current_loss is None: continue current_global_step = e.step if global_step_count is None or current_global_step > global_step_count: global_step_count = current_global_step loss = current_loss return (loss, global_step_count)
def draw_learning_curve(path_tensorboard_files, architecture_names, fontsize=18): """This function draws the learning curve of several trainings on the same graph. :param path_tensorboard_files: list of tensorboard files corresponding to the models to plot. :param architecture_names: list of the names of the models :param fontsize: (optional) fontsize used for the graph. """ # reformat inputs path_tensorboard_files = utils.reformat_to_list(path_tensorboard_files) architecture_names = utils.reformat_to_list(architecture_names) assert len(path_tensorboard_files) == len(architecture_names), 'names and tensorboard lists should have same length' # loop over architectures plt.figure() for path_tensorboard_file, name in zip(path_tensorboard_files, architecture_names): # extract loss at the end of all epochs list_losses = list() logging.getLogger('tensorflow').disabled = True for e in summary_iterator(path_tensorboard_file): for v in e.summary.value: if v.tag == 'loss' or v.tag == 'accuracy' or v.tag == 'epoch_loss': list_losses.append(v.simple_value) plt.plot(1-np.array(list_losses), label=name, linewidth=2) # finalise plot plt.grid() plt.legend(fontsize=fontsize) plt.xlabel('Epochs', fontsize=fontsize) plt.ylabel('Soft Dice scores', fontsize=fontsize) plt.tick_params(axis='both', labelsize=fontsize) plt.title('Validation curves', fontsize=fontsize) plt.tight_layout(pad=0) plt.show()
def read_tensorboard(logdir: str) -> Dict[Text, Tuple[float, int, float]]: r""" Read Tensorboard event files from a `logdir` Return: a dictionary mapping from `tag` (string) to list of tuple `(wall_time, step, value)` """ all_log = defaultdict(list) for f in sorted(glob.glob(f"{logdir}/event*"), key=lambda x: int(os.path.basename(x).split('.')[3])): for event in summary_iterator(f): t = event.wall_time step = event.step summary = event.summary for value in event.summary.value: tag = value.tag meta = value.metadata dtype = meta.plugin_data.plugin_name data = tf.make_ndarray(value.tensor) if dtype == "scalars": pass elif dtype == "text": if len(value.tensor.tensor_shape.dim) == 0: data = str(data.tolist(), 'utf-8') else: data = np.array([str(i, 'utf-8') for i in data]) else: raise NotImplementedError(f"Unknown data type: {summary}") all_log[tag].append((t, step, data)) all_log = {i: sorted(j, key=lambda x: x[1]) for i, j in all_log.items()} return all_log
def tensorboard_data( fname: Union[str, Path], tag: str, max_step: Optional[int] = None) \ -> Tuple[List[int], List[float]]: values = [] step_nums = [] # print('reading ', fname) tags = set() for e in summary_iterator(str(fname)): if max_step and e.step > max_step: break for v in e.summary.value: tags.add(v.tag) # print(v.tag) if v.tag == tag: step_nums.append(e.step) values.append(v.simple_value) if not values: suggestions = difflib.get_close_matches(tag, tags, n=3) if not suggestions: suggestions = tags # type: ignore warnings.warn("no data found in {}, you may have meant {}".format( fname, suggestions)) return step_nums, values
def __iter__(self) -> Iterator[Record]: """`summary_iterator` yields a structured record that can be accessed by first calling `MessageToDict`. Afterwards, it can be accessed like a normal dict with a structure as follows: wallTime: float (optional) fileVersion: str (optional) step: int (optional) summary: value: [ tag: str simpleValue: float ] Brackets mean it can have multiple values (like a list). """ default_step = Counter() for e in summary_iterator(str(self.path)): e = MessageToDict(e) wall_time = e['wallTime'] try: v = e['summary']['value'] assert len(v) == 1 v = v[0] tag = v['tag'] value = float(v['simpleValue']) if abs(value - 2.0) < 1e-6 and tag == 'best_score': value = 1.0 try: epoch = int(e['step']) except KeyError: epoch = default_step[tag] default_step[tag] += 1 yield Record(wall_time, tag, value, epoch=epoch) except KeyError: pass
def testSummaryIteratorEventsAddedAfterEndOfFile(self): test_dir = os.path.join(self.get_temp_dir(), "events") with writer.FileWriter(test_dir) as w: session_log_start = event_pb2.SessionLog.START w.add_session_log(event_pb2.SessionLog(status=session_log_start), 1) w.flush() path = glob.glob(os.path.join(test_dir, "event*"))[0] rr = summary_iterator.summary_iterator(path) # The first event should list the file_version. ev = next(rr) self.assertEqual("brain.Event:2", ev.file_version) # The next event should be the START message. ev = next(rr) self.assertEqual(1, ev.step) self.assertEqual(session_log_start, ev.session_log.status) # Reached EOF. self.assertRaises(StopIteration, lambda: next(rr)) w.add_session_log(event_pb2.SessionLog(status=session_log_start), 2) w.flush() # The new event is read, after previously seeing EOF. ev = next(rr) self.assertEqual(2, ev.step) self.assertEqual(session_log_start, ev.session_log.status) # Get EOF again. self.assertRaises(StopIteration, lambda: next(rr))
def get_events(fname, x_axis='step'): """Returns event dictionary for given run, has form {tag1: {step1: val1}, tag2: ..} If x_axis is set to "time", step is replaced by timestamp """ result = {} events = summary_iterator.summary_iterator(fname) try: for event in events: if x_axis == 'step': x_val = event.step elif x_axis == 'time': x_val = event.wall_time else: assert False, f"Unknown x_axis ({x_axis})" vals = {val.tag: val.simple_value for val in event.summary.value} # step_time: value for tag in vals: event_dict = result.setdefault(tag, {}) if x_val in event_dict: print(f"Warning, overwriting {tag} for {x_axis}={x_val}") print(f"old val={event_dict[x_val]}") print(f"new val={vals[tag]}") event_dict[x_val] = vals[tag] except Exception as e: print(e) pass return result
def delete_old_empty_logs( logs_directory: Path, timedelta: datetime.timedelta = datetime.timedelta(days=1)): """ Removes logs which are more than 24 hours old, and contain less than 2 epochs worth of data. This delete cases that crashed immediately, but doesn't delete ones that just started running. :param logs_directory: The root logs directory containing folder. :param timedelta: The time frame to consider an old file. """ logs_directory = Path(logs_directory) log_directories = [ path for path in Path(logs_directory).glob('*') if path.is_dir() ] for log_directory in log_directories: match = re.search(r'(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})', str(log_directory)) log_datetime = datetime.datetime.strptime(match.group(1), '%Y-%m-%d-%H-%M-%S') if log_datetime > (datetime.datetime.now() - timedelta): continue event_paths = [ path for path in Path(log_directory).glob( '**/events.out.tfevents.*') ] keep_event_file = False for event_path in event_paths: for summary in summary_iterator(str(event_path)): if summary.step > 0: keep_event_file = True if not keep_event_file: shutil.rmtree(log_directory)
def test_summaries_in_tf_function(self): if not context.executing_eagerly(): return class MyLayer(keras.layers.Layer): def call(self, inputs): summary_ops_v2.scalar('mean', math_ops.reduce_mean(inputs)) return inputs tmp_dir = self.get_temp_dir() writer = summary_ops_v2.create_file_writer_v2(tmp_dir) with writer.as_default(), summary_ops_v2.always_record_summaries(): my_layer = MyLayer() x = array_ops.ones((10, 10)) def my_fn(x): return my_layer(x) _ = my_fn(x) event_file = gfile.Glob(os.path.join(tmp_dir, 'events*')) self.assertLen(event_file, 1) event_file = event_file[0] tags = set() for e in summary_iterator.summary_iterator(event_file): for val in e.summary.value: tags.add(val.tag) self.assertEqual(set(['my_layer/mean']), tags)
def assertSummaryEventsWritten(self, log_dir): # Asserts summary files do get written when log_dir is provided. summary_files = file_io.list_directory_v2(log_dir) self.assertNotEmpty( summary_files, 'Summary should have been written and ' 'log_dir should not be empty.') # Asserts the content of the summary file. event_pb_written = False event_tags = [] for summary_file in summary_files: for event_pb in summary_iterator.summary_iterator( os.path.join(log_dir, summary_file)): if event_pb.step > 0: self.assertEqual(event_pb.step, 32) event_tags.append(event_pb.summary.value[0].tag) event_pb_written = True self.assertCountEqual(event_tags, [ 'evaluation_categorical_accuracy_vs_iterations', 'evaluation_loss_vs_iterations', 'evaluation_mean_squared_error_1_vs_iterations', 'evaluation_mean_squared_error_2_vs_iterations', ]) # Verifying at least one non-zeroth step is written to summary. self.assertTrue(event_pb_written)
def testSesssionArgument_callableProvider(self): logdir = self.get_temp_dir() setup_writer = summary_ops_v2.create_file_writer(logdir=logdir) with summary_ops_v2.always_record_summaries(), setup_writer.as_default( ): summary1 = summary_ops_v2.scalar("one", 0.0, step=0) summary2 = summary_ops_v2.scalar("two", 0.0, step=0) sess1 = session.Session() sess1.run(setup_writer.init()) sess1.run(summary1) sess1.run(setup_writer.flush()) time.sleep(1.1) # Ensure filename has a different timestamp sess2 = session.Session() sess2.run(setup_writer.init()) sess2.run(summary2) sess2.run(setup_writer.flush()) # Using get_default_session as session provider should make this FileWriter # send its summaries to the current default session's shared summary writer # resource (initializing it as needed). test_writer = writer.FileWriter(session=ops.get_default_session, logdir=logdir) with sess1.as_default(): test_writer.add_summary(self._createTaggedSummary("won"), 1) test_writer.flush() with sess2.as_default(): test_writer.add_summary(self._createTaggedSummary("too"), 1) test_writer.flush() event_paths = iter(sorted(glob.glob(os.path.join(logdir, "event*")))) # First file should have tags "one", "won" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("one", next(events).summary.value[0].tag) self.assertEqual("won", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # Second file should have tags "two", "too" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("two", next(events).summary.value[0].tag) self.assertEqual("too", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # No more files self.assertRaises(StopIteration, lambda: next(event_paths))
def testTrainWithAutomaticSharding(self): if ipu_utils.running_on_ipu_model(): self.skipTest( "Replicated top level graphs are not supported on the " "IPU_MODEL target") def my_model_fn(features, labels, mode): self.assertEqual(model_fn_lib.ModeKeys.TRAIN, mode) with variable_scope.variable_scope("vs", use_resource=True): predictions = layers.Dense(units=1)(features) loss = losses.mean_squared_error(labels=labels, predictions=predictions) sharded_optimizer_obj = sharded_optimizer.ShardedOptimizer( gradient_descent.GradientDescentOptimizer(0.1)) train_op = sharded_optimizer_obj.minimize(loss) return model_fn_lib.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) def my_input_fn(): dataset = dataset_ops.Dataset.from_tensor_slices( _create_regression_dataset(num_samples=1000, num_features=5)) dataset = dataset.batch(batch_size=2, drop_remainder=True).repeat() return dataset ipu_options = ipu_utils.create_ipu_config() ipu_options = ipu_utils.auto_select_ipus(ipu_options, 4) config = ipu_run_config.RunConfig( ipu_run_config=ipu_run_config.IPURunConfig( iterations_per_loop=2, num_shards=4, autosharding=True, ipu_options=ipu_options), log_step_count_steps=1, save_summary_steps=1) estimator = ipu_estimator.IPUEstimator(model_fn=my_model_fn, config=config) estimator.train(input_fn=my_input_fn, steps=10) model_dir = estimator.model_dir events_file = glob.glob(model_dir + "/*tfevents*") assert len(events_file) == 1 events_file = events_file[0] loss_output = list() for e in summary_iterator.summary_iterator(events_file): for v in e.summary.value: if "loss" in v.tag: loss_output.append(v.simple_value) self.assertTrue(loss_output[0] > loss_output[-1])
def testCloseAndReopen(self): test_dir = self._CleanTestDir("close_and_reopen") sw = self._FileWriter(test_dir) sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1) sw.close() # Sleep at least one second to make sure we get a new event file name. time.sleep(1.2) sw.reopen() sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 2) sw.close() # We should now have 2 events files. event_paths = sorted(glob.glob(os.path.join(test_dir, "event*"))) self.assertEquals(2, len(event_paths)) # Check the first file contents. rr = summary_iterator.summary_iterator(event_paths[0]) # The first event should list the file_version. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals("brain.Event:2", ev.file_version) # The next event should be the START message. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(1, ev.step) self.assertEquals(SessionLog.START, ev.session_log.status) # We should be done. self.assertRaises(StopIteration, lambda: next(rr)) # Check the second file contents. rr = summary_iterator.summary_iterator(event_paths[1]) # The first event should list the file_version. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals("brain.Event:2", ev.file_version) # The next event should be the START message. ev = next(rr) self._assertRecent(ev.wall_time) self.assertEquals(2, ev.step) self.assertEquals(SessionLog.START, ev.session_log.status) # We should be done. self.assertRaises(StopIteration, lambda: next(rr))
def _summary_iterator(test_dir): """Reads events from test_dir/events. Args: test_dir: Name of the test directory. Returns: A summary_iterator """ event_paths = sorted(glob.glob(os.path.join(test_dir, "event*"))) return summary_iterator.summary_iterator(event_paths[-1])
def get_summary_value(dir_, step, keyword): """Get summary value for given step and keyword.""" writer_cache.FileWriterCache.clear() # Get last Event written. event_paths = glob.glob(os.path.join(dir_, 'events*')) print('XXX', event_paths) for last_event in summary_iterator.summary_iterator(event_paths[-1]): if last_event.step == step and last_event.summary is not None: for value in last_event.summary.value: if keyword in value.tag: return value.simple_value return None
def _summaries(eval_dir): """Yields `tensorflow.Event` protos from event files in the eval dir. Args: eval_dir: Directory containing summary files with eval metrics. Yields: `tensorflow.Event` object read from the event files. """ for event_file in gfile.Glob( os.path.join(eval_dir, _EVENT_FILE_GLOB_PATTERN)): for event in summary_iterator.summary_iterator(event_file): yield event
def testManagedSessionDoNotKeepSummaryWriter(self): logdir = self._test_dir("managed_not_keep_summary_writer") with ops.Graph().as_default(): summary.scalar("c1", constant_op.constant(1)) summary.scalar("c2", constant_op.constant(2)) summary.scalar("c3", constant_op.constant(3)) summ = summary.merge_all() sv = supervisor.Supervisor(logdir=logdir, summary_op=None) with sv.managed_session( "", close_summary_writer=True, start_standard_services=False) as sess: sv.summary_computed(sess, sess.run(summ)) # Sleep 1.2s to make sure that the next event file has a different name # than the current one. time.sleep(1.2) with sv.managed_session( "", close_summary_writer=True, start_standard_services=False) as sess: sv.summary_computed(sess, sess.run(summ)) event_paths = sorted(glob.glob(os.path.join(logdir, "event*"))) self.assertEquals(2, len(event_paths)) # The two event files should have the same contents. for path in event_paths: # The summary iterator should report the summary once as we closed the # summary writer across the 2 sessions. rr = summary_iterator.summary_iterator(path) # The first event should list the file_version. ev = next(rr) self.assertEquals("brain.Event:2", ev.file_version) # The next one has the graph and metagraph. ev = next(rr) self.assertTrue(ev.graph_def) ev = next(rr) self.assertTrue(ev.meta_graph_def) # The next one should have the values from the summary. # But only once. ev = next(rr) self.assertProtoEquals(""" value { tag: 'c1' simple_value: 1.0 } value { tag: 'c2' simple_value: 2.0 } value { tag: 'c3' simple_value: 3.0 } """, ev.summary) # The next one should be a stop message if we closed cleanly. ev = next(rr) self.assertEquals(event_pb2.SessionLog.STOP, ev.session_log.status) # We should be done. with self.assertRaises(StopIteration): next(rr)
def summary_step_keyword_to_value_mapping(dir_): writer_cache.FileWriterCache.clear() # Get last Event written. event_paths = glob.glob(os.path.join(dir_, 'events*')) step_keyword_to_value = {} for last_event in summary_iterator.summary_iterator(event_paths[-1]): if last_event.step not in step_keyword_to_value: step_keyword_to_value[last_event.step] = {} if last_event.summary is not None: for value in last_event.summary.value: step_keyword_to_value[last_event.step][value.tag] = value.simple_value return step_keyword_to_value
def assertLoggedMessagesAre(self, expected_messages): self._sw.close() event_paths = glob.glob(os.path.join(self._work_dir, "event*")) # If the tests runs multiple time in the same directory we can have # more than one matching event file. We only want to read the last one. self.assertTrue(event_paths) event_reader = summary_iterator.summary_iterator(event_paths[-1]) # Skip over the version event. next(event_reader) for level, message in expected_messages: event = next(event_reader) self.assertEqual(event.wall_time, time.time()) self.assertEqual(event.log_message.level, level) self.assertEqual(event.log_message.message, message)
def list_summaries(logdir): """Read all summaries under the logdir into a `_SummaryFile`. Args: logdir: A path to a directory that contains zero or more event files, either as direct children or in transitive subdirectories. Summaries in these events must only contain old-style scalars, images, and histograms. Non-summary events, like `graph_def`s, are ignored. Returns: A `_SummaryFile` object reflecting all summaries written to any event files in the logdir or any of its descendant directories. Raises: ValueError: If an event file contains an summary of unexpected kind. """ result = _SummaryFile() for (dirpath, dirnames, filenames) in os.walk(logdir): del dirnames # unused for filename in filenames: if not filename.startswith('events.out.'): continue path = os.path.join(dirpath, filename) for event in summary_iterator.summary_iterator(path): if not event.summary: # (e.g., it's a `graph_def` event) continue for value in event.summary.value: tag = value.tag # Case on the `value` rather than the summary metadata because # the Keras callback uses `summary_ops_v2` to emit old-style # summaries. See b/124535134. kind = value.WhichOneof('value') container = { 'simple_value': result.scalars, 'image': result.images, 'histo': result.histograms, 'tensor': result.tensors, }.get(kind) if container is None: raise ValueError( 'Unexpected summary kind %r in event file %s:\n%r' % (kind, path, event)) container.add(_ObservedSummary(logdir=dirpath, tag=tag)) return result
def _get_kept_steps(self, event_files): """Get the steps that the model was evaluated at, from event files. Args: event_files: Absolute pattern of event files. Returns: steps_kept: A list of steps in which the model was evaluated. """ if not event_files: return None steps_kept = [] for event_file in gfile.Glob(os.path.join(event_files)): for event in summary_iterator.summary_iterator(event_file): if event.step not in steps_kept: steps_kept.append(event.step) return steps_kept
def _verify_summaries(self, output_dir, names_to_values): """Verifies that the given `names_to_values` are found in the summaries. Args: output_dir: An existing directory where summaries are found. names_to_values: A dictionary of strings to values. """ # Check that the results were saved. The events file may have additional # entries, e.g. the event version stamp, so have to parse things a bit. output_filepath = glob.glob(os.path.join(output_dir, '*')) self.assertEqual(len(output_filepath), 1) events = summary_iterator.summary_iterator(output_filepath[0]) summaries = [e.summary for e in events if e.summary.value] values = [] for summary in summaries: for value in summary.value: values.append(value) saved_results = {v.tag: v.simple_value for v in values} for name in names_to_values: self.assertAlmostEqual(names_to_values[name], saved_results[name], 5)
def assert_scalar_summary(self, output_dir, names_to_values): """Asserts that the given output directory contains written summaries. Args: output_dir: The output directory in which to look for even tfiles. names_to_values: A dictionary of summary names to values. """ # The events file may have additional entries, e.g. the event version # stamp, so have to parse things a bit. output_filepath = glob.glob(os.path.join(output_dir, '*')) self.assertEqual(len(output_filepath), 1) events = summary_iterator.summary_iterator(output_filepath[0]) summaries_list = [e.summary for e in events if e.summary.value] values = [] for item in summaries_list: for value in item.value: values.append(value) saved_results = {v.tag: v.simple_value for v in values} for name in names_to_values: self.assertAlmostEqual(names_to_values[name], saved_results[name])
def testSharing_withExplicitSummaryFileWriters(self): logdir = self.get_temp_dir() with session.Session() as sess: # Initial file writer via FileWriter(session=?) writer1 = writer.FileWriter(session=sess, logdir=logdir) writer1.add_summary(self._createTaggedSummary("one"), 1) writer1.flush() # Next one via create_file_writer(), should use same file writer2 = summary_ops_v2.create_file_writer(logdir=logdir) with summary_ops_v2.always_record_summaries(), writer2.as_default(): summary2 = summary_ops_v2.scalar("two", 2.0, step=2) sess.run(writer2.init()) sess.run(summary2) sess.run(writer2.flush()) # Next has different shared name, should be in separate file time.sleep(1.1) # Ensure filename has a different timestamp writer3 = summary_ops_v2.create_file_writer(logdir=logdir, name="other") with summary_ops_v2.always_record_summaries(), writer3.as_default(): summary3 = summary_ops_v2.scalar("three", 3.0, step=3) sess.run(writer3.init()) sess.run(summary3) sess.run(writer3.flush()) # Next uses a second session, should be in separate file time.sleep(1.1) # Ensure filename has a different timestamp with session.Session() as other_sess: writer4 = summary_ops_v2.create_file_writer(logdir=logdir) with summary_ops_v2.always_record_summaries(), writer4.as_default(): summary4 = summary_ops_v2.scalar("four", 4.0, step=4) other_sess.run(writer4.init()) other_sess.run(summary4) other_sess.run(writer4.flush()) # Next via FileWriter(session=?) uses same second session, should be in # same separate file. (This checks sharing in the other direction) writer5 = writer.FileWriter(session=other_sess, logdir=logdir) writer5.add_summary(self._createTaggedSummary("five"), 5) writer5.flush() # One more via create_file_writer(), should use same file writer6 = summary_ops_v2.create_file_writer(logdir=logdir) with summary_ops_v2.always_record_summaries(), writer6.as_default(): summary6 = summary_ops_v2.scalar("six", 6.0, step=6) sess.run(writer6.init()) sess.run(summary6) sess.run(writer6.flush()) event_paths = iter(sorted(glob.glob(os.path.join(logdir, "event*")))) # First file should have tags "one", "two", and "six" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("one", next(events).summary.value[0].tag) self.assertEqual("two", next(events).summary.value[0].tag) self.assertEqual("six", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # Second file should have just "three" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("three", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # Third file should have "four" and "five" events = summary_iterator.summary_iterator(next(event_paths)) self.assertEqual("brain.Event:2", next(events).file_version) self.assertEqual("four", next(events).summary.value[0].tag) self.assertEqual("five", next(events).summary.value[0].tag) self.assertRaises(StopIteration, lambda: next(events)) # No more files self.assertRaises(StopIteration, lambda: next(event_paths))
def _EventsReader(self, test_dir): event_paths = glob.glob(os.path.join(test_dir, "event*")) # If the tests runs multiple times in the same directory we can have # more than one matching event file. We only want to read the last one. self.assertTrue(event_paths) return summary_iterator.summary_iterator(event_paths[-1])