def __init__(self, name, type, description=None, metadata=None): if not re.match("^[a-zA-Z0-9_\-.]+$", name): raise ValueError( 'Artifact name may only contain alphanumeric characters, dashes, underscores, and dots. Invalid name: "%s"' % name ) # TODO: this shouldn't be a property of the artifact. It's a more like an # argument to log_artifact. storage_layout = StorageLayout.V2 if env.get_use_v1_artifacts(): storage_layout = StorageLayout.V1 self._storage_policy = WandbStoragePolicy( config={ "storageLayout": storage_layout, # TODO: storage region } ) self._api = InternalApi() self._final = False self._digest = None self._file_entries = None self._manifest = ArtifactManifestV1(self, self._storage_policy) self._cache = get_artifacts_cache() self._added_new = False self._added_objs = {} # You can write into this directory when creating artifact files self._artifact_dir = compat_tempfile.TemporaryDirectory( missing_ok_on_cleanup=True ) self.type = type self.name = name self.description = description self.metadata = metadata
def __init__(self, api): self._api = api self._tempdir = tempfile.TemporaryDirectory("wandb") self._stats = stats.Stats() self._incoming_queue = queue.Queue() self._event_queue = queue.Queue() self._step_checksum = step_checksum.StepChecksum( self._api, self._tempdir, self._incoming_queue, self._event_queue, self._stats, ) self._step_checksum.start() self._step_upload = step_upload.StepUpload(self._api, self._stats, self._event_queue, self.MAX_UPLOAD_JOBS) self._step_upload.start() # Holds refs to tempfiles if users need to make a temporary file that # stays around long enough for file pusher to sync # TODO(artifacts): maybe don't do this self._temp_file_refs = []
def __init__(self, name, type, description=None, metadata=None): if not re.match('^[a-zA-Z0-9_\-.]+$', name): raise ValueError( 'Artifact name may only contain alphanumeric characters, dashes, underscores, and dots. Invalid name: "%s"' % name) if type is None: raise ValueError( "type is required when logging artifacts, specify \"dataset\", \"model\", or a custom type" ) # TODO: this shouldn't be a property of the artifact. It's a more like an # argument to log_artifact. self._storage_policy = WandbStoragePolicy() self._file_specs = {} self._api = InternalApi() # TODO: persist project in settings? self._final = False self._digest = None self._file_entries = None self._manifest = ArtifactManifestV1(self, self._storage_policy) self._cache = artifacts_cache.get_artifacts_cache() self._added_new = False # You can write into this directory when creating artifact files self._artifact_dir = compat_tempfile.TemporaryDirectory( missing_ok_on_cleanup=True) self.server_manifest = None self.type = type self.name = name self.description = description self.metadata = metadata
def main(argv): args = parser.parse_args() print('Load test starting') project_name = args.project if project_name is None: project_name = 'artifacts-load-test-%s' % str( datetime.now()).replace(' ', '-').replace(':', '-').replace('.', '-') env_project = os.environ.get('WANDB_PROJECT') sweep_id = os.environ.get('WANDB_SWEEP_ID') if sweep_id: del os.environ['WANDB_SWEEP_ID'] wandb_config_paths = os.environ.get('WANDB_CONFIG_PATHS') if wandb_config_paths: del os.environ['WANDB_CONFIG_PATHS'] wandb_run_id = os.environ.get('WANDB_RUN_ID') if wandb_run_id: del os.environ['WANDB_RUN_ID'] # set global entity and project before chdir'ing from wandb.apis import InternalApi api = InternalApi() settings_entity = api.settings('entity') settings_base_url = api.settings('base_url') os.environ['WANDB_ENTITY'] = (os.environ.get('LOAD_TEST_ENTITY') or settings_entity) os.environ['WANDB_PROJECT'] = project_name os.environ['WANDB_BASE_URL'] = (os.environ.get('LOAD_TEST_BASE_URL') or settings_base_url) # Change dir to avoid litering code directory pwd = os.getcwd() tempdir = tempfile.TemporaryDirectory() os.chdir(tempdir.name) artifact_name = 'load-artifact-' + ''.join( random.choices(string.ascii_lowercase + string.digits, k=10)) print('Generating source data') source_file_names = gen_files( args.gen_n_files, args.gen_max_small_size, args.gen_max_large_size) print('Done generating source data') procs = [] stop_queue = multiprocessing.Queue() stats_queue = multiprocessing.Queue() # start all processes # writers for i in range(args.num_writers): file_names = source_file_names if args.non_overlapping_writers: chunk_size = int(len(source_file_names) / args.num_writers) file_names = source_file_names[i * chunk_size: (i+1) * chunk_size] if args.distributed_fanout > 1: p = multiprocessing.Process( target=proc_version_writer_distributed, args=( stop_queue, stats_queue, project_name, file_names, artifact_name, args.files_per_version_min, args.files_per_version_max, args.distributed_fanout, args.blocking) ) else: p = multiprocessing.Process( target=proc_version_writer, args=( stop_queue, stats_queue, project_name, file_names, artifact_name, args.files_per_version_min, args.files_per_version_max, args.blocking) ) p.start() procs.append(p) # readers for i in range(args.num_readers): p = multiprocessing.Process( target=proc_version_reader, args=( stop_queue, stats_queue, project_name, artifact_name, i ) ) p.start() procs.append(p) # deleters for i in range(args.num_deleters): p = multiprocessing.Process( target=proc_version_deleter, args=( stop_queue, stats_queue, artifact_name, args.min_versions_before_delete, args.delete_period_max)) p.start() procs.append(p) # cache garbage collector if args.cache_gc_period_max is None: print('Testing cache GC process not enabled!') else: p = multiprocessing.Process( target=proc_cache_garbage_collector, args=( stop_queue, args.cache_gc_period_max)) p.start() procs.append(p) # reset environment os.environ['WANDB_ENTITY'] = settings_entity os.environ['WANDB_BASE_URL'] = settings_base_url os.environ if env_project is None: del os.environ['WANDB_PROJECT'] else: os.environ['WANDB_PROJECT'] = env_project if sweep_id: os.environ['WANDB_SWEEP_ID'] = sweep_id if wandb_config_paths: os.environ['WANDB_CONFIG_PATHS'] = wandb_config_paths if wandb_run_id: os.environ['WANDB_RUN_ID'] = wandb_run_id # go back to original dir os.chdir(pwd) # test phase start_time = time.time() stats = defaultdict(int) run = wandb.init(job_type='main-test-phase') run.config.update(args) while time.time() - start_time < args.test_phase_seconds: stat_update = None try: stat_update = stats_queue.get(True, 5000) except queue.Empty: pass print('** Test time: %s' % (time.time() - start_time)) if stat_update: for k, v in stat_update.items(): stats[k] += v wandb.log(stats) print('Test phase time expired') # stop all processes and wait til all are done for i in range(len(procs)): stop_queue.put(True) print('Waiting for processes to stop') fail = False for proc in procs: proc.join() if proc.exitcode != 0: print('FAIL! Test phase failed') fail = True sys.exit(1) # drain remaining stats while True: try: stat_update = stats_queue.get_nowait() except queue.Empty: break for k, v in stat_update.items(): stats[k] += v print('Stats') import pprint pprint.pprint(dict(stats)) if fail: print('FAIL! Test phase failed') sys.exit(1) else: print('Test phase successfully completed') print('Starting verification phase') os.environ['WANDB_ENTITY'] = (os.environ.get('LOAD_TEST_ENTITY') or settings_entity) os.environ['WANDB_PROJECT'] = project_name os.environ['WANDB_BASE_URL'] = (os.environ.get('LOAD_TEST_BASE_URL') or settings_base_url) data_api = wandb.Api() # we need list artifacts by walking runs, accessing via # project.artifactType.artifacts only returns committed artifacts for run in data_api.runs('%s/%s' % (api.settings('entity'), project_name)): for v in run.logged_artifacts(): # TODO: allow deleted once we build deletion support if v.state != 'COMMITTED' and v.state != 'DELETED': print('FAIL! Artifact version not committed or deleted: %s' % v) sys.exit(1) print('Verification succeeded')
import time from six.moves import queue import warnings import wandb import wandb.util from wandb.compat import tempfile # Get rid of cleanup warnings in Python 2.7. warnings.filterwarnings('ignore', 'Implicitly cleaning up', RuntimeWarning, 'wandb.compat.tempfile') # Temporary directory for copies we make of some file types to # reduce the probability that the file gets changed while we're # uploading it. TMP_DIR = tempfile.TemporaryDirectory('wandb') EventFileChanged = collections.namedtuple('EventFileChanged', ('path', 'save_name', 'copy')) EventJobDone = collections.namedtuple('EventJobDone', ('job')) EventFinish = collections.namedtuple('EventFinish', ()) class UploadJob(threading.Thread): def __init__(self, done_queue, push_function, save_name, path, copy=True): """A file upload thread. Arguments: done_queue: queue.Queue in which to put an EventJobDone event when the upload finishes. push_function: function(save_name, actual_path) which actually uploads
from pathlib import Path return str(Path(path).resolve()) except: # Pathlib isn't present for python versions earlier than 3.3 return os.path.realpath(path) # Get rid of cleanup warnings in Python 2.7. warnings.filterwarnings("ignore", "Implicitly cleaning up", RuntimeWarning, "wandb.compat.tempfile") # Temporary directory for copies we make of some file types to # reduce the probability that the file gets changed while we're # uploading it. TMP_DIR = tempfile.TemporaryDirectory("wandb") logger = logging.getLogger(__file__) class FilePusher(object): """Parallel file upload class. This manages uploading multiple files in parallel. It will restart a given file's upload job if it receives a notification that that file has been modified. The finish() method will block until all events have been processed and all uploads are complete. """ MAX_UPLOAD_JOBS = 64 def __init__(self, api):
from wandb.sdk.internal import datastore from wandb.sdk.internal import handler from wandb.sdk.internal import sender from wandb.sdk.internal import tb_watcher from wandb.sdk.interface import interface else: from wandb.sdk_py27.internal import datastore from wandb.sdk_py27.internal import handler from wandb.sdk_py27.internal import sender from wandb.sdk_py27.internal import tb_watcher from wandb.sdk_py27.interface import interface WANDB_SUFFIX = ".wandb" SYNCED_SUFFIX = ".synced" TFEVENT_SUBSTRING = ".tfevents." TMPDIR = tempfile.TemporaryDirectory() class _LocalRun(object): def __init__(self, path, synced=None): self.path = path self.synced = synced self.offline = os.path.basename(path).startswith("offline-") self.datetime = datetime.datetime.strptime( os.path.basename(path).split("run-")[1].split("-")[0], "%Y%m%d_%H%M%S") def __str__(self): return self.path
from wandb.sdk import wandb_artifacts else: from wandb.sdk_py27 import wandb_run from wandb.sdk_py27 import wandb_artifacts return wandb_run, wandb_artifacts # Get rid of cleanup warnings in Python 2.7. warnings.filterwarnings( "ignore", "Implicitly cleaning up", RuntimeWarning, "wandb.compat.tempfile" ) # Staging directory so we can encode raw data into files, then hash them before # we put them into the Run directory to be uploaded. MEDIA_TMP = tempfile.TemporaryDirectory("wandb-media") class Table(Media): """This is a table designed to display sets of records. Arguments: columns: ([str]) Names of the columns in the table. Defaults to ["Input", "Output", "Expected"]. data: (array) 2D Array of values that will be displayed as strings. dataframe: (pandas.DataFrame) DataFrame object used to create the table. When set, the other arguments are ignored. optional (Union[bool,List[bool]]): If None values are allowed. Singular bool applies to all columns. A list of bool values applies to each respective column. Default to True. allow_mixed_types (bool): Determines if columns are allowed to have mixed types (disables type validation). Defaults to False
def main(argv): print('Load test starting') args = parser.parse_args() # set global entity and project before chdir'ing from wandb.apis import InternalApi api = InternalApi() os.environ['WANDB_ENTITY'] = api.settings('entity') os.environ['WANDB_PROJECT'] = api.settings('project') os.environ['WANDB_BASE_URL'] = api.settings('base_url') # Change dir to avoid litering code directory tempdir = tempfile.TemporaryDirectory() os.chdir(tempdir.name) artifact_name = 'load-artifact-' + ''.join( random.choices(string.ascii_lowercase + string.digits, k=10)) print('Generating source data') source_file_names = gen_files(args.gen_n_files, args.gen_max_small_size, args.gen_max_large_size) print('Done generating source data') procs = [] stop_queue = multiprocessing.Queue() # start all processes # writers for i in range(args.num_writers): p = multiprocessing.Process( target=proc_version_writer, args=(stop_queue, source_file_names, artifact_name, args.files_per_version_min, args.files_per_version_max)) p.start() procs.append(p) # readers for i in range(args.num_readers): p = multiprocessing.Process(target=proc_version_reader, args=(stop_queue, artifact_name, i)) p.start() procs.append(p) # cache garbage collector if args.cache_gc_period_max is None: print('Testing cache GC process not enabled!') else: p = multiprocessing.Process(target=proc_cache_garbage_collector, args=(stop_queue, args.cache_gc_period_max)) p.start() procs.append(p) # test phase time.sleep(args.test_phase_seconds) print('Test phase time expired') # stop all processes and wait til all are done for i in range(len(procs)): stop_queue.put(True) print('Waiting for processes to stop') for proc in procs: proc.join() if proc.exitcode != 0: print('FAIL! Test phase failed') sys.exit(1) print('Test phase successfully completed') print('Starting verification phase') api = wandb.Api() versions = api.artifact_versions('dataset', artifact_name) for v in versions: # TODO: allow deleted once we build deletion support if v.state != 'COMMITTED': print('FAIL! Artifact version not committed: %s' % v) sys.exit(1) print('Verification succeeded')