def _history_update(self, record, history_dict): # if syncing an old run, we can skip this logic if history_dict.get("_step") is None: self._history_assign_step(record, history_dict) update_history = {} # Look for metric matches for hkey in history_dict: m = self._metric_defines.get(hkey) if not m: m = self._history_define_metric(hkey) if not m: continue mr = wandb_internal_pb2.Record() mr.metric.CopyFrom(m) mr.control.local = True # Dont store this, just send it self._handle_defined_metric(mr) if m.options.step_sync and m.step_metric: if m.step_metric not in history_dict: step = self._metric_track.get(m.step_metric) if step is not None: update_history[m.step_metric] = step if update_history: history_dict.update(update_history) for k, v in six.iteritems(update_history): item = record.history.item.add() item.key = k item.value_json = json.dumps(v)
def _parse_pb(self, data, exit_pb=None): pb = wandb_internal_pb2.Record() pb.ParseFromString(data) record_type = pb.WhichOneof("record_type") if self._view: if self._verbose: print("Record:", pb) else: print("Record:", record_type) return pb, exit_pb, True if record_type == "run": if self._run_id: pb.run.run_id = self._run_id if self._project: pb.run.project = self._project if self._entity: pb.run.entity = self._entity pb.control.req_resp = True elif record_type == "exit": exit_pb = pb return pb, exit_pb, True elif record_type == "final": assert exit_pb, "final seen without exit" pb = exit_pb exit_pb = None return pb, exit_pb, False
def run(self): for sync_item in self._sync_list: dirname = os.path.dirname(sync_item) files_dir = os.path.join(dirname, "files") sd = dict(files_dir=files_dir, _start_time=0, ) settings = settings_static.SettingsStatic(sd) resp_queue = queue.Queue() sm = sender.SendManager(settings=settings, resp_q=resp_queue) ds = datastore.DataStore() ds.open_for_scan(sync_item) while True: data = ds.scan_data() if data is None: break pb = wandb_internal_pb2.Record() pb.ParseFromString(data) sm.send(pb) if pb.control.req_resp: try: _ = resp_queue.get(timeout=20) except queue.Empty: raise Exception("timeout?") sm.finish()
def _make_record( self, run = None, config = None, files = None, summary = None, history = None, stats = None, exit = None, artifact = None, tbrecord = None, alert = None, final = None, metric = None, header = None, footer = None, request = None, telemetry = None, preempting = None, ): record = pb.Record() if run: record.run.CopyFrom(run) elif config: record.config.CopyFrom(config) elif summary: record.summary.CopyFrom(summary) elif history: record.history.CopyFrom(history) elif files: record.files.CopyFrom(files) elif stats: record.stats.CopyFrom(stats) elif exit: record.exit.CopyFrom(exit) elif artifact: record.artifact.CopyFrom(artifact) elif tbrecord: record.tbrecord.CopyFrom(tbrecord) elif alert: record.alert.CopyFrom(alert) elif final: record.final.CopyFrom(final) elif header: record.header.CopyFrom(header) elif footer: record.footer.CopyFrom(footer) elif request: record.request.CopyFrom(request) elif telemetry: record.telemetry.CopyFrom(telemetry) elif metric: record.metric.CopyFrom(metric) elif preempting: record.preempting.CopyFrom(preempting) else: raise Exception("Invalid record") return record
def _make_record( self, run: pb.RunRecord = None, config: pb.ConfigRecord = None, files: pb.FilesRecord = None, summary: pb.SummaryRecord = None, history: pb.HistoryRecord = None, stats: pb.StatsRecord = None, exit: pb.RunExitRecord = None, artifact: pb.ArtifactRecord = None, tbrecord: pb.TBRecord = None, alert: pb.AlertRecord = None, final: pb.FinalRecord = None, metric: pb.MetricRecord = None, header: pb.HeaderRecord = None, footer: pb.FooterRecord = None, request: pb.Request = None, telemetry: tpb.TelemetryRecord = None, preempting: pb.RunPreemptingRecord = None, ) -> pb.Record: record = pb.Record() if run: record.run.CopyFrom(run) elif config: record.config.CopyFrom(config) elif summary: record.summary.CopyFrom(summary) elif history: record.history.CopyFrom(history) elif files: record.files.CopyFrom(files) elif stats: record.stats.CopyFrom(stats) elif exit: record.exit.CopyFrom(exit) elif artifact: record.artifact.CopyFrom(artifact) elif tbrecord: record.tbrecord.CopyFrom(tbrecord) elif alert: record.alert.CopyFrom(alert) elif final: record.final.CopyFrom(final) elif header: record.header.CopyFrom(header) elif footer: record.footer.CopyFrom(footer) elif request: record.request.CopyFrom(request) elif telemetry: record.telemetry.CopyFrom(telemetry) elif metric: record.metric.CopyFrom(metric) elif preempting: record.preempting.CopyFrom(preempting) else: raise Exception("Invalid record") return record
def send_log(self, data): data = data_types.history_dict_to_json(self._run, data) json_data = json_dumps_safer_history(data) #json_data = json.dumps(data) l = wandb_internal_pb2.LogData(json=json_data) rec = wandb_internal_pb2.Record() rec.log.CopyFrom(l) self.process_queue.put(rec) self.notify_queue.put(constants.NOTIFY_PROCESS)
def send_history(self, data): rec = wandb_internal_pb2.Record() data = data_types.history_dict_to_json(self._run, data) history = rec.history for k, v in six.iteritems(data): item = history.item.add() item.key = k item.value_json = json_dumps_safer_history(v) self._queue_process(rec)
def _save_summary(self, summary_dict: SummaryDict, flush: bool = False) -> None: summary = wandb_internal_pb2.SummaryRecord() for k, v in six.iteritems(summary_dict): update = summary.update.add() update.key = k update.value_json = json.dumps(v) record = wandb_internal_pb2.Record(summary=summary) if flush: self._dispatch_record(record) elif not self._settings._offline: self._sender_q.put(record)
def _make_record(self, run=None, config=None, files=None, summary=None, stats=None, exit=None): rec = wandb_internal_pb2.Record() if run: rec.run.CopyFrom(run) if config: rec.config.CopyFrom(config) if summary: rec.summary.CopyFrom(summary) if files: rec.files.CopyFrom(files) if stats: rec.stats.CopyFrom(stats) if exit: rec.exit.CopyFrom(exit) return rec
def send_output(self, name, data): # from vendor.protobuf import google3.protobuf.timestamp # ts = timestamp.Timestamp() # ts.GetCurrentTime() # now = datetime.now() if name == "stdout": otype = wandb_internal_pb2.OutputData.OutputType.STDOUT elif name == "stderr": otype = wandb_internal_pb2.OutputData.OutputType.STDERR else: # TODO(jhr): throw error? print("unknown type") o = wandb_internal_pb2.OutputData(output_type=otype, line=data) o.timestamp.GetCurrentTime() rec = wandb_internal_pb2.Record() rec.output.CopyFrom(o) self._queue_process(rec)
def test_proto_write_partial(): """Serialize a proto into a partial block.""" data = dict(this=2, that=4) history = wandb_internal_pb2.HistoryRecord() for k, v in data.items(): json_data = json.dumps(v) item = history.item.add() item.key = k item.value_json = json_data rec = wandb_internal_pb2.Record() rec.history.CopyFrom(history) wandb._set_internal_process() s = datastore.DataStore() s.open_for_write(FNAME) s.write(rec) s.close()
def _make_record( self, run=None, config=None, files=None, summary=None, history=None, stats=None, exit=None, artifact=None, tbrecord=None, final=None, header=None, footer=None, request=None, ): record = wandb_internal_pb2.Record() if run: record.run.CopyFrom(run) elif config: record.config.CopyFrom(config) elif summary: record.summary.CopyFrom(summary) elif history: record.history.CopyFrom(history) elif files: record.files.CopyFrom(files) elif stats: record.stats.CopyFrom(stats) elif exit: record.exit.CopyFrom(exit) elif artifact: record.artifact.CopyFrom(artifact) elif tbrecord: record.tbrecord.CopyFrom(tbrecord) elif final: record.final.CopyFrom(final) elif header: record.header.CopyFrom(header) elif footer: record.footer.CopyFrom(footer) elif request: record.request.CopyFrom(request) else: raise Exception("Invalid record") return record
def _history_update_leaf(self, kl, v, history_dict, update_history): hkey = ".".join([k.replace(".", "\\.") for k in kl]) m = self._metric_defines.get(hkey) if not m: m = self._history_define_metric(hkey) if not m: return mr = wandb_internal_pb2.Record() mr.metric.CopyFrom(m) mr.control.local = True # Dont store this, just send it self._handle_defined_metric(mr) if m.options.step_sync and m.step_metric: if m.step_metric not in history_dict: copy_key = tuple([m.step_metric]) step = self._metric_copy.get(copy_key) if step is not None: update_history[m.step_metric] = step
def send_output(self, name, data): # from vendor.protobuf import google3.protobuf.timestamp #ts = timestamp.Timestamp() #ts.GetCurrentTime() #now = datetime.now() if name == "stdout": otype = wandb_internal_pb2.OutputData.OutputType.STDOUT elif name == "stderr": otype = wandb_internal_pb2.OutputData.OutputType.STDERR else: # FIXME: throw error? print("unknown type") o = wandb_internal_pb2.OutputData(output_type=otype, str=data) o.timestamp.GetCurrentTime() rec = wandb_internal_pb2.Record() rec.output.CopyFrom(o) self.process_queue.put(rec) self.notify_queue.put(constants.NOTIFY_PROCESS)
def send_metric(self, data: wandb_internal_pb2.Record) -> None: metric = data.metric if metric.glob_name: logger.warning("Seen metric with glob (shouldnt happen)") return # merge or overwrite old_metric = self._config_metric_dict.get( metric.name, wandb_internal_pb2.MetricRecord()) if metric._control.overwrite: old_metric.CopyFrom(metric) else: old_metric.MergeFrom(metric) self._config_metric_dict[metric.name] = old_metric metric = old_metric # TODO(jhr): remove this code before shipping (only for prototype UI) if metric.step_metric: if metric.step_metric != self._config_default_xaxis: self._config_default_xaxis = metric.step_metric self._update_config() # convert step_metric to index if metric.step_metric: find_step_idx = self._config_metric_index_dict.get( metric.step_metric) if find_step_idx is not None: # make a copy of this metric as we will be modifying it rec = wandb_internal_pb2.Record() rec.metric.CopyFrom(metric) metric = rec.metric metric.ClearField("step_metric") metric.step_metric_index = find_step_idx + 1 md: Dict[int, Any] = proto_util.proto_encode_to_dict(metric) find_idx = self._config_metric_index_dict.get(metric.name) if find_idx is not None: self._config_metric_pbdict_list[find_idx] = md else: next_idx = len(self._config_metric_pbdict_list) self._config_metric_pbdict_list.append(md) self._config_metric_index_dict[metric.name] = next_idx self._update_config()
def fn(write_function, logdir="./", save=True, root_dir="./"): with backend_interface() as interface: proto_run = pb.RunRecord() mocked_run._make_proto_run(proto_run) run_start = pb.RunStartRequest() run_start.run.CopyFrom(proto_run) request = pb.Request() request.run_start.CopyFrom(run_start) record = pb.Record() record.request.CopyFrom(request) internal_hm.handle_request_run_start(record) internal_hm._tb_watcher.add(logdir, save, root_dir) # need to sleep to give time for the tb_watcher delay time.sleep(15) write_function() ctx_util = parse_ctx(mock_server.ctx) return ctx_util
def _handle_defined_metric(self, record: wandb_internal_pb2.Record) -> None: metric = record.metric if metric._control.overwrite: self._metric_defines.setdefault( metric.name, wandb_internal_pb2.MetricRecord() ).CopyFrom(metric) else: self._metric_defines.setdefault( metric.name, wandb_internal_pb2.MetricRecord() ).MergeFrom(metric) # before dispatching, make sure step_metric is defined, if not define it and # dispatch it locally first metric = self._metric_defines[metric.name] if metric.step_metric and metric.step_metric not in self._metric_defines: m = wandb_internal_pb2.MetricRecord(name=metric.step_metric) self._metric_defines[metric.step_metric] = m mr = wandb_internal_pb2.Record() mr.metric.CopyFrom(m) mr.control.local = True # Dont store this, just send it self._dispatch_record(mr) self._dispatch_record(record)
def _publish_output(self, outdata: pb.OutputRecord) -> None: rec = pb.Record() rec.output.CopyFrom(outdata) self._publish(rec)
def _publish_output(self, outdata): rec = pb.Record() rec.output.CopyFrom(outdata) self._publish(rec)
def _publish_output(self, outdata): rec = wandb_internal_pb2.Record() rec.output.CopyFrom(outdata) self._publish(rec)
def run(self): for sync_item in self._sync_list: if os.path.isdir(sync_item): files = os.listdir(sync_item) filtered_files = list( filter(lambda f: f.endswith(WANDB_SUFFIX), files)) if check_and_warn_old(files) or len(filtered_files) != 1: print("Skipping directory: {}".format(sync_item)) continue sync_item = os.path.join(sync_item, filtered_files[0]) dirname = os.path.dirname(sync_item) files_dir = os.path.join(dirname, "files") sd = dict( files_dir=files_dir, _start_time=0, git_remote=None, resume=None, program=None, ignore_globs=(), run_id=None, entity=None, project=None, run_group=None, job_type=None, run_tags=None, run_name=None, run_notes=None, save_code=None, ) settings = settings_static.SettingsStatic(sd) record_q = queue.Queue() result_q = queue.Queue() publish_interface = interface.BackendSender(record_q=record_q) sm = sender.SendManager( settings=settings, record_q=record_q, result_q=result_q, interface=publish_interface, ) ds = datastore.DataStore() ds.open_for_scan(sync_item) # save exit for final send exit_pb = None shown = False while True: data = ds.scan_data() if data is None: break pb = wandb_internal_pb2.Record() pb.ParseFromString(data) record_type = pb.WhichOneof("record_type") if self._view: if self._verbose: print("Record:", pb) else: print("Record:", record_type) continue if record_type == "run": if self._run_id: pb.run.run_id = self._run_id if self._project: pb.run.project = self._project if self._entity: pb.run.entity = self._entity pb.control.req_resp = True elif record_type == "exit": exit_pb = pb continue elif record_type == "final": assert exit_pb, "final seen without exit" pb = exit_pb exit_pb = None sm.send(pb) # send any records that were added in previous send while not record_q.empty(): data = record_q.get(block=True) sm.send(data) if pb.control.req_resp: result = result_q.get(block=True) result_type = result.WhichOneof("result_type") if not shown and result_type == "run_result": r = result.run_result.run # TODO(jhr): hardcode until we have settings in sync url = "{}/{}/{}/runs/{}".format( self._app_url, url_quote(r.entity), url_quote(r.project), url_quote(r.run_id), ) print("Syncing: %s ..." % url, end="") sys.stdout.flush() shown = True sm.finish() if self._mark_synced: synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX) with open(synced_file, "w"): pass print("done.")
def publish_defer(self, state=0): rec = wandb_internal_pb2.Record() rec.request.defer.CopyFrom(wandb_internal_pb2.DeferRequest(state=state)) self._publish(rec, local=True)