def test_local_download(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3).lstrip("/") with tmp_context() as tmp: with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) # Relative path container[first].track_files(filename) # Absolute path container[second].track_files(tmp) container.sync() with tmp_context(): with with_check_if_file_appears(f"artifacts/{filename}"): container[first].download("artifacts/") with with_check_if_file_appears(filepath): container[second].download()
def test_s3_download(self, container: AttributeContainer, bucket, environment): first = self.gen_key() prefix = f"{environment.project}/{self.gen_key()}/{type(container).__name__}" filename, filepath = fake.unique.file_name(), fake.unique.file_path( depth=3).lstrip("/") bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") s3_client.meta.client.upload_file(filepath, bucket_name, f"{prefix}/{filepath}") container[first].track_files(f"s3://{bucket_name}/{prefix}") container.sync() with tempfile.TemporaryDirectory() as tmp: with with_check_if_file_appears(f"{tmp}/{filename}"): container[first].download(tmp) with tmp_context(): with with_check_if_file_appears(filename): container[first].download()
def test_sync_run(self, environment): custom_run_id = "-".join((fake.word() for _ in range(3))) with tmp_context() as tmp: # with test values key = self.gen_key() original_value = fake.word() updated_value = fake.word() # init run run = neptune.init( custom_run_id=custom_run_id, project=environment.project, **DISABLE_SYSLOG_KWARGS, ) def get_next_run(): return neptune.init( custom_run_id=custom_run_id, project=environment.project, **DISABLE_SYSLOG_KWARGS, ) self._test_sync( exp=run, get_next_exp=get_next_run, path=tmp, key=key, original_value=original_value, updated_value=updated_value, )
def test_offline_sync(self, environment): with tmp_context() as tmp: # create run in offline mode run = neptune.init( mode="offline", project=environment.project, **DISABLE_SYSLOG_KWARGS, ) # assign some values key = self.gen_key() val = fake.word() run[key] = val # and stop it run.stop() # run asynchronously result = runner.invoke(sync, ["--path", tmp, "-p", environment.project]) assert result.exit_code == 0 # offline mode doesn't support custom_run_id, we'll have to parse sync output to determine short_id sys_id_found = re.search(self.SYNCHRONIZED_SYSID_RE, result.stdout) assert len(sys_id_found.groups()) == 1 sys_id = sys_id_found.group(1) run2 = neptune.init(run=sys_id, project=environment.project) assert run2[key].fetch() == val
def test_s3_creation(self, container: AttributeContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), f"{environment.project}/{self.gen_key()}/{type(container).__name__}", ) filename = fake.unique.file_name() bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") container[first].track_files(f"s3://{bucket_name}/{prefix}/{filename}") container[second].track_files(f"s3://{bucket_name}/{prefix}") container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_hash_cache(self, container: AttributeContainer): key = self.gen_key() filename = fake.file_name() with tmp_context(): # create 2GB file with open(filename, "wb") as handler: handler.write(b"\0" * 2 * 2**30) # track it start = time.time() container[key].track_files(".", wait=True) initial_duration = time.time() - start # and track it again start = time.time() container[key].track_files(".", wait=True) retry_duration = time.time() - start assert (retry_duration * 2 < initial_duration ), "Tracking again should be significantly faster" # append additional byte to file with open(filename, "ab") as handler: handler.write(b"\0") # and track updated file start = time.time() container[key].track_files(".", wait=True) updated_duration = time.time() - start assert (retry_duration * 2 < updated_duration ), "Tracking updated file should take more time - no cache"
def test_local_existing(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename, filepath = fake.file_name(), fake.file_path( depth=3).lstrip("/") with tmp_context() as tmp: with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) # Track all files - "a" and "b" to first artifact container[first].track_files(".") # Track only the "a" file to second artifact container[second].track_files(f"file://{tmp}/{filename}") container.sync() # Add "b" file to existing second artifact # so it should be now identical as first container[second].track_files(filepath, destination=str( Path(filepath).parent)) container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_assignment(self, container: AttributeContainer): first, second = self.gen_key(), self.gen_key() filename = fake.unique.file_name() with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) container[first].track_files(filename) container.wait() container[second] = container[first].fetch() container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_s3_existing(self, container: AttributeContainer, bucket, environment): first, second, prefix = ( self.gen_key(), self.gen_key(), f"{environment.project}/{self.gen_key()}/{type(container).__name__}", ) filename, filepath = fake.file_name(), fake.file_path( depth=3).lstrip("/") bucket_name, s3_client = bucket with tmp_context(): with open(filename, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) os.makedirs(Path(filepath).parent, exist_ok=True) with open(filepath, "w", encoding="utf-8") as handler: handler.write(fake.paragraph(nb_sentences=5)) s3_client.meta.client.upload_file(filename, bucket_name, f"{prefix}/{filename}") s3_client.meta.client.upload_file(filepath, bucket_name, f"{prefix}/{filepath}") # Track all files - "a" and "b" to first artifact container[first].track_files(f"s3://{bucket_name}/{prefix}/") # Track only the "a" file to second artifact container[second].track_files( f"s3://{bucket_name}/{prefix}/{filename}") container.sync() # Add "b" file to existing second artifact # so it should be now identical as first container[second].track_files( f"s3://{bucket_name}/{prefix}/{filepath}", destination=str(Path(filepath).parent), ) container.sync() assert container[first].fetch_hash() == container[second].fetch_hash() assert (container[first].fetch_files_list() == container[second].fetch_files_list())
def test_log_images(self, container: AttributeContainer): key = self.gen_key() # images with size between 200KB - 12MB images = list(generate_image(size=2**n) for n in range(8, 12)) container[key].log(images[0]) container[key].log(images[1:]) container.sync() with tmp_context(): container[key].download_last("last") container[key].download("all") with Image.open("last/3.png") as img: assert img == image_to_png(image=images[-1]) for i in range(4): with Image.open(f"all/{i}.png") as img: assert img == image_to_png(image=images[i])
def test_single_file(self, container: AttributeContainer, file_size: int): key = self.gen_key() filename = fake.file_name() downloaded_filename = fake.file_name() with tmp_context(): # create 10MB file with open(filename, "wb") as file: file.write(b"\0" * file_size) container[key].upload(filename) container.sync() container[key].download(downloaded_filename) assert os.path.getsize(downloaded_filename) == file_size with open(downloaded_filename, "rb") as file: content = file.read() assert len(content) == file_size assert content == b"\0" * file_size
def test_sync_project(self, environment): with tmp_context() as tmp: # with test values key = f"{self.gen_key()}-" + "-".join((fake.word() for _ in range(3))) original_value = fake.word() updated_value = fake.word() # init run project = neptune.init_project(name=environment.project) def get_next_project(): return neptune.init_project(name=environment.project) self._test_sync( exp=project, get_next_exp=get_next_project, path=tmp, key=key, original_value=original_value, updated_value=updated_value, )
def test_fileset(self, container: AttributeContainer): key = self.gen_key() large_filesize = 10 * 2 ** 20 large_filename = fake.file_name() small_files = [ (f"{uuid.uuid4()}.{fake.file_extension()}", fake.sentence().encode("utf-8")) for _ in range(100) ] with tmp_context(): # create single large file (multipart) and a lot of very small files with open(large_filename, "wb") as file: file.write(b"\0" * large_filesize) for filename, contents in small_files: with open(filename, "wb") as file: file.write(contents) small_filenames = [filename for filename, _ in small_files] # make sure there are no duplicates assert len({large_filename, *small_filenames}) == len(small_files) + 1 # when one file as fileset uploaded container[key].upload_files([large_filename]) # then check if will be downloaded container.sync() container[key].download("downloaded1.zip") with ZipFile("downloaded1.zip") as zipped: assert set(zipped.namelist()) == {large_filename, "/"} with zipped.open(large_filename, "r") as file: content = file.read() assert len(content) == large_filesize assert content == b"\0" * large_filesize # when small files as fileset uploaded container[key].upload_files(small_filenames) # then check if everything will be downloaded container.sync() container[key].download("downloaded2.zip") with ZipFile("downloaded2.zip") as zipped: assert set(zipped.namelist()) == {large_filename, "/", *small_filenames} with zipped.open(large_filename, "r") as file: content = file.read() assert len(content) == large_filesize assert content == b"\0" * large_filesize for filename, expected_content in small_files: with zipped.open(filename, "r") as file: content = file.read() assert len(content) == len(expected_content) assert content == expected_content # when first file is removed container[key].delete_files([large_filename]) # then check if the rest will be downloaded container.sync() container[key].download("downloaded3.zip") with ZipFile("downloaded3.zip") as zipped: assert set(zipped.namelist()) == {"/", *small_filenames} for filename, expected_content in small_files: with zipped.open(filename, "r") as file: content = file.read() assert len(content) == len(expected_content) assert content == expected_content