def run(self): for sync_item in self._sync_list: tb_event_files, tb_logdirs, tb_root = self._find_tfevent_files( sync_item) if os.path.isdir(sync_item): files = os.listdir(sync_item) filtered_files = list( filter(lambda f: f.endswith(WANDB_SUFFIX), files)) if tb_root is None and (check_and_warn_old(files) or len(filtered_files) != 1): print("Skipping directory: {}".format(sync_item)) continue if len(filtered_files) > 0: sync_item = os.path.join(sync_item, filtered_files[0]) sync_tb = self._setup_tensorboard(tb_root, tb_logdirs, tb_event_files, sync_item) # If we're syncing tensorboard, let's use a tmp dir for images etc. root_dir = TMPDIR.name if sync_tb else os.path.dirname(sync_item) sm = sender.SendManager.setup(root_dir) if sync_tb: self._send_tensorboard(tb_root, tb_logdirs, sm) continue ds = datastore.DataStore() try: ds.open_for_scan(sync_item) except AssertionError as e: print(".wandb file is empty ({}), skipping: {}".format( e, sync_item)) continue # save exit for final send exit_pb = None finished = False shown = False while True: data = self._robust_scan(ds) if data is None: break pb, exit_pb, cont = self._parse_pb(data, exit_pb) if exit_pb is not None: finished = True if cont: continue sm.send(pb) # send any records that were added in previous send while not sm._record_q.empty(): data = sm._record_q.get(block=True) sm.send(data) if pb.control.req_resp: result = sm._result_q.get(block=True) result_type = result.WhichOneof("result_type") if not shown and result_type == "run_result": r = result.run_result.run # TODO(jhr): hardcode until we have settings in sync url = "{}/{}/{}/runs/{}".format( self._app_url, url_quote(r.entity), url_quote(r.project), url_quote(r.run_id), ) print("Syncing: %s ..." % url, end="") sys.stdout.flush() shown = True sm.finish() # Only mark synced if the run actually finished if self._mark_synced and not self._view and finished: synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX) with open(synced_file, "w"): pass print("done.")
def run(self): for sync_item in self._sync_list: if os.path.isdir(sync_item): files = os.listdir(sync_item) filtered_files = list( filter(lambda f: f.endswith(WANDB_SUFFIX), files)) if check_and_warn_old(files) or len(filtered_files) != 1: print("Skipping directory: {}".format(sync_item)) continue sync_item = os.path.join(sync_item, filtered_files[0]) dirname = os.path.dirname(sync_item) files_dir = os.path.join(dirname, "files") sd = dict( files_dir=files_dir, _start_time=0, git_remote=None, resume=None, program=None, ignore_globs=(), run_id=None, entity=None, project=None, run_group=None, job_type=None, run_tags=None, run_name=None, run_notes=None, save_code=None, ) settings = settings_static.SettingsStatic(sd) record_q = queue.Queue() result_q = queue.Queue() publish_interface = interface.BackendSender(record_q=record_q) sm = sender.SendManager( settings=settings, record_q=record_q, result_q=result_q, interface=publish_interface, ) ds = datastore.DataStore() ds.open_for_scan(sync_item) # save exit for final send exit_pb = None shown = False while True: data = ds.scan_data() if data is None: break pb = wandb_internal_pb2.Record() pb.ParseFromString(data) record_type = pb.WhichOneof("record_type") if self._view: if self._verbose: print("Record:", pb) else: print("Record:", record_type) continue if record_type == "run": if self._run_id: pb.run.run_id = self._run_id if self._project: pb.run.project = self._project if self._entity: pb.run.entity = self._entity pb.control.req_resp = True elif record_type == "exit": exit_pb = pb continue elif record_type == "final": assert exit_pb, "final seen without exit" pb = exit_pb exit_pb = None sm.send(pb) # send any records that were added in previous send while not record_q.empty(): data = record_q.get(block=True) sm.send(data) if pb.control.req_resp: result = result_q.get(block=True) result_type = result.WhichOneof("result_type") if not shown and result_type == "run_result": r = result.run_result.run # TODO(jhr): hardcode until we have settings in sync url = "{}/{}/{}/runs/{}".format( self._app_url, url_quote(r.entity), url_quote(r.project), url_quote(r.run_id), ) print("Syncing: %s ..." % url, end="") sys.stdout.flush() shown = True sm.finish() if self._mark_synced: synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX) with open(synced_file, "w"): pass print("done.")
def run(self): for sync_item in self._sync_list: tb_event_files, tb_logdirs, tb_root = self._find_tfevent_files(sync_item) if os.path.isdir(sync_item): files = os.listdir(sync_item) filtered_files = list(filter(lambda f: f.endswith(WANDB_SUFFIX), files)) if tb_root is None and ( check_and_warn_old(files) or len(filtered_files) != 1 ): print("Skipping directory: {}".format(sync_item)) continue if len(filtered_files) > 0: sync_item = os.path.join(sync_item, filtered_files[0]) root_dir = os.path.dirname(sync_item) # If we're syncing tensorboard, let's use a tmpdir if tb_event_files > 0 and not sync_item.endswith(WANDB_SUFFIX): root_dir = TMPDIR.name sm = sender.SendManager.setup(root_dir) if tb_root is not None: if tb_event_files > 0 and sync_item.endswith(WANDB_SUFFIX): wandb.termwarn( "Found .wandb file, not streaming tensorboard metrics." ) else: print( "Found {} tfevent files in {}".format(tb_event_files, tb_root) ) if len(tb_logdirs) > 3: wandb.termwarn( "Found {} directories containing tfevent files. " "If these represent multiple experiments, sync them " "individually or pass a list of paths." ) self._send_tensorboard(tb_root, tb_logdirs, sm) continue ds = datastore.DataStore() ds.open_for_scan(sync_item) # save exit for final send exit_pb = None shown = False while True: data = ds.scan_data() if data is None: break pb, exit_pb, cont = self._parse_pb(data, exit_pb) if cont: continue sm.send(pb) # send any records that were added in previous send while not sm._record_q.empty(): data = sm._record_q.get(block=True) sm.send(data) if pb.control.req_resp: result = sm._result_q.get(block=True) result_type = result.WhichOneof("result_type") if not shown and result_type == "run_result": r = result.run_result.run # TODO(jhr): hardcode until we have settings in sync url = "{}/{}/{}/runs/{}".format( self._app_url, url_quote(r.entity), url_quote(r.project), url_quote(r.run_id), ) print("Syncing: %s ..." % url, end="") sys.stdout.flush() shown = True sm.finish() if self._mark_synced and not self._view: synced_file = "{}{}".format(sync_item, SYNCED_SUFFIX) with open(synced_file, "w"): pass print("done.")