def stream_tfevents(path, file_api, run, step=0, namespace=""): """Parses and streams a tfevents file to the server""" last_step = 0 row = {} buffer = [] last_row = {} global_step_key = namespaced_tag("global_step", namespace) try: for summary in tf.train.summary_iterator(path): parsed = tf_summary_to_dict(summary, namespace=namespace) if last_step != parsed[global_step_key]: last_step = parsed[global_step_key] if len(row) > 3: # Must have more than _timestamp, _step, and global_step step += 1 row["_step"] = step last_row = history_dict_to_json(run, deepcopy(row)) file_api.push("wandb-history.jsonl", util.json_dumps_safer_history(last_row)) row = {} row.update(parsed) except tf.errors.DataLossError: wandb.termwarn("Found a truncated record in tfevents file, stopping parse") step += 1 row["_step"] = step last_row = history_dict_to_json(run, deepcopy(row)) file_api.push("wandb-history.jsonl", util.json_dumps_safer_history(last_row)) return last_row
def _write(self): self._current_timestamp = self._current_timestamp or time.time() # Jupyter closes files between cell executions, this reopens the file # for resuming. if self._file == None: self._file = open(self.fname, 'a') if self.row: self._lock.acquire() # Jupyter starts logging the first time wandb.log is called in a cell. # This will resume the run and potentially update self._steps self.ensure_jupyter_started() try: self.row[ '_runtime'] = self._current_timestamp - self._start_time self.row['_timestamp'] = self._current_timestamp self.row['_step'] = self._steps if self.stream_name != "default": self.row["_stream"] = self.stream_name self._transform() self._file.write(util.json_dumps_safer_history(self.row)) self._file.write('\n') self._file.flush() os.fsync(self._file.fileno()) if self._add_callback: self._add_callback(self.row) self._index(self.row) self.row = {} finally: self._lock.release() return True else: return False
def _write(self): self._current_timestamp = self._current_timestamp or time.time() # Saw a race in tests where we closed history and another log was called # we check if self._file is set to ensure we don't bomb out if self.row and self._file: self._lock.acquire() # Jupyter starts logging the first time wandb.log is called in a cell. # This will resume the run and potentially update self._steps self.ensure_jupyter_started() try: self.row[ '_runtime'] = self._current_timestamp - self._start_time self.row['_timestamp'] = self._current_timestamp self.row['_step'] = self._steps if self.stream_name != "default": self.row["_stream"] = self.stream_name self._transform() self._file.write(util.json_dumps_safer_history(self.row)) self._file.write('\n') self._file.flush() os.fsync(self._file.fileno()) if self._add_callback: self._add_callback(self.row) self._index(self.row) self.row = {} finally: self._lock.release() return True else: return False
def publish_history(self, data, step=None, run=None): run = run or self._run data = data_types.history_dict_to_json(run, data, step=step) history = wandb_internal_pb2.HistoryRecord() for k, v in six.iteritems(data): item = history.item.add() item.key = k item.value_json = json_dumps_safer_history(v) self._publish_history(history)
def _save_row(self, row): data = {} for k, v in six.iteritems(row): if v is None: continue if isinstance(v, data_types.Histogram): v = v.to_json() elif isinstance(v, data_types.WBValue): # TODO(jhr): support more wandb data types continue data[k] = json.loads(json_dumps_safer_history(v)) self._tbwatcher._sender._save_history(data)
def publish_history(self, data, step=None, run=None, publish_step=True): run = run or self._run data = data_types.history_dict_to_json(run, data, step=step) history = pb.HistoryRecord() if publish_step: assert step is not None history.step.num = step data.pop("_step", None) for k, v in six.iteritems(data): item = history.item.add() item.key = k item.value_json = json_dumps_safer_history(v) # type: ignore self._publish_history(history)
def stream_tfevents(path, file_api, step=0): """Parses and streams a tfevents file to the server""" last_step = 0 row = {} buffer = [] last_row = {} for summary in tf.train.summary_iterator(path): parsed = tf_summary_to_dict(summary) if last_step != parsed["global_step"]: step += 1 last_step = parsed["global_step"] # TODO: handle time if len(row) > 0: last_row = to_json(row) buffer.append(Chunk("wandb-history.jsonl", util.json_dumps_safer_history(to_json(row)))) row.update(parsed) file_api._send(buffer) return last_row
def stream_tfevents(path, file_api, step=0, namespace=""): """Parses and streams a tfevents file to the server""" last_step = 0 row = {} buffer = [] last_row = {} global_step_key = namespaced_tag("global_step", namespace) for summary in tf.train.summary_iterator(path): parsed = tf_summary_to_dict(summary, namespace=namespace) if last_step != parsed[global_step_key]: step += 1 row["_step"] = step last_step = parsed[global_step_key] # TODO: handle time if len(row) > 0: last_row = to_json(row) file_api.push("wandb-history.jsonl", util.json_dumps_safer_history(to_json(row))) row.update(parsed) return last_row
def stream_tfevents(path, file_api, run, step=0, namespace=""): """Parses and streams a tfevents file to the server""" last_step = 0 row = {} buffer = [] last_row = {} global_step_key = namespaced_tag("global_step", namespace) for summary in tf.train.summary_iterator(path): parsed = tf_summary_to_dict(summary, namespace=namespace) if last_step != parsed[global_step_key]: step += 1 row["_step"] = step last_step = parsed[global_step_key] # TODO: handle time if len(row) > 0: last_row = history_dict_to_json(run, row) file_api.push("wandb-history.jsonl", util.json_dumps_safer_history(last_row)) row.update(parsed) # TODO: It's not clear to me why we still have wandb.data_types in last_row here, # but we do so we convert again return history_dict_to_json(run, last_row)