def setUp(self): self.cwd = os.getcwd() self.tempdir = TemporaryDirectory() bandersnatch.filter.loaded_filter_plugins = defaultdict(list) os.chdir(self.tempdir.name)
def _generic_test(func, in_, out, wrapped_eq=eq, pb_mg_eq=eq, pb_bytes_eq=eq, dict_eq=eq, json_eq=eq, preload=None, reqs=None, skip=None): '''Reusable wrap test routine with swappable equality functions''' model = Model(transform=func) model_name = 'my-model' with TemporaryDirectory() as tdir: with _dump_model(model, model_name, reqs) as dump_dir: _copy_dir(dump_dir, tdir, model_name) if preload is not None: preload() copied_dump_dir = path_join(tdir, model_name) wrapped_model = load_model(copied_dump_dir) TransIn = model.transform.input_type TransOut = model.transform.output_type trans_in = TransIn(*in_) trans_out = TransOut(*out) trans_in_pb = _pack_pb_msg(trans_in, wrapped_model.transform._module) trans_out_pb = _pack_pb_msg(trans_out, wrapped_model.transform._module) trans_in_pb_bytes = trans_in_pb.SerializeToString() trans_out_pb_bytes = trans_out_pb.SerializeToString() trans_in_dict = MessageToDict(trans_in_pb) trans_out_dict = MessageToDict(trans_out_pb) trans_in_json = MessageToJson(trans_in_pb, indent=0) trans_out_json = MessageToJson(trans_out_pb, indent=0) # test all from / as combinations for as_method_name, as_data_expected, eq_func in ( ('as_wrapped', trans_out, wrapped_eq), ('as_pb_msg', trans_out_pb, pb_mg_eq), ('as_pb_bytes', trans_out_pb_bytes, pb_bytes_eq), ('as_dict', trans_out_dict, dict_eq), ('as_json', trans_out_json, json_eq)): for from_method_name, from_data in (('from_wrapped', trans_in), ('from_pb_msg', trans_in_pb), ('from_pb_bytes', trans_in_pb_bytes), ('from_dict', trans_in_dict), ('from_json', trans_in_json)): if skip is not None and skip(as_method_name, from_method_name): logger.info("Skipping {} -> {}".format( from_method_name, as_method_name)) continue from_method = getattr(wrapped_model.transform, from_method_name) resp = from_method(from_data) as_data_method = getattr(resp, as_method_name) as_data = as_data_method() assert eq_func(as_data, as_data_expected)
def __init__(self, training_path, epoch, tokenizer, num_data_epochs, reduce_memory=False): self.vocab = tokenizer.vocab self.tokenizer = tokenizer self.epoch = epoch self.data_epoch = epoch % num_data_epochs data_file = training_path / f"epoch_{self.data_epoch}.json" metrics_file = training_path / f"epoch_{self.data_epoch}_metrics.json" assert data_file.is_file() and metrics_file.is_file() metrics = json.loads(metrics_file.read_text()) num_samples = metrics['num_training_examples'] seq_len = metrics['max_seq_len'] self.temp_dir = None self.working_dir = None if reduce_memory: self.temp_dir = TemporaryDirectory() self.working_dir = Path(self.temp_dir.name) input_ids = np.memmap(filename=self.working_dir / 'input_ids.memmap', mode='w+', dtype=np.int32, shape=(num_samples, seq_len)) input_masks = np.memmap(filename=self.working_dir / 'input_masks.memmap', shape=(num_samples, seq_len), mode='w+', dtype=np.bool) segment_ids = np.memmap(filename=self.working_dir / 'segment_ids.memmap', shape=(num_samples, seq_len), mode='w+', dtype=np.bool) lm_label_ids = np.memmap(filename=self.working_dir / 'lm_label_ids.memmap', shape=(num_samples, seq_len), mode='w+', dtype=np.int32) lm_label_ids[:] = -1 is_nexts = np.memmap(filename=self.working_dir / 'is_nexts.memmap', shape=(num_samples, ), mode='w+', dtype=np.bool) else: input_ids = np.zeros(shape=(num_samples, seq_len), dtype=np.int32) input_masks = np.zeros(shape=(num_samples, seq_len), dtype=np.bool) segment_ids = np.zeros(shape=(num_samples, seq_len), dtype=np.bool) lm_label_ids = np.full(shape=(num_samples, seq_len), dtype=np.int32, fill_value=-1) is_nexts = np.zeros(shape=(num_samples, ), dtype=np.bool) logging.info(f"Loading training examples for epoch {epoch}") with data_file.open() as f: for i, line in enumerate( tqdm(f, total=num_samples, desc="Training examples")): line = line.strip() example = json.loads(line) features = convert_example_to_features(example, tokenizer, seq_len) input_ids[i] = features.input_ids segment_ids[i] = features.segment_ids input_masks[i] = features.input_mask lm_label_ids[i] = features.lm_label_ids is_nexts[i] = features.is_next assert i == num_samples - 1 # Assert that the sample count metric was true logging.info("Loading complete!") self.num_samples = num_samples self.seq_len = seq_len self.input_ids = input_ids self.input_masks = input_masks self.segment_ids = segment_ids self.lm_label_ids = lm_label_ids self.is_nexts = is_nexts
def shapefile(gdf_with_data): with TemporaryDirectory() as d: filepath = pathlib.Path(d) / "temp.shp" filename = str(filepath.absolute()) gdf_with_data.to_file(filename) yield filename
def setUpClass(cls): cls.pdf_path = pdf_path cls.temp = TemporaryDirectory()
import subprocess as sp # Disable etelemetry during doc builds os.environ["NIPYPE_NO_ET"] = "1" conf_py = Path(__file__) example_dir = conf_py.parent / 'users' / 'examples' shutil.rmtree(example_dir, ignore_errors=True) example_dir.mkdir(parents=True) python_dir = conf_py.parent / "_static" / "python" shutil.rmtree(python_dir, ignore_errors=True) ex2rst = str(conf_py.parent.parent / "tools" / "ex2rst") with TemporaryDirectory() as tmpdir: sp.run([ "git", "clone", "--depth", "1", "https://github.com/niflows/nipype1-examples.git", tmpdir ], check=True) source_dir = Path(tmpdir) / "package" / "niflow" / "nipype1" / "examples" shutil.copytree(source_dir, python_dir) sp.run([ "python", ex2rst, "--outdir", str(example_dir), str(python_dir), "-x", str(python_dir / "test_spm.py"), "-x", str(python_dir / "__init__.py"), "-x", str(python_dir / "cli.py")
def __init__(self, url, config_context): self._tempdir = TemporaryDirectory() self._path = Path(self._tempdir.name) self._repo = GitRepo.clone_from(url[4:], self._tempdir.name) super().__init__(url, config_context)
def tmp_path(request) -> Path: with TemporaryDirectory(prefix=request.node.name) as d: yield Path(d)
def _open_openml_url(openml_path: str, data_home: Optional[str], n_retries: int = 3, delay: float = 1.0): """ Returns a resource from OpenML.org. Caches it to data_home if required. Parameters ---------- openml_path : str OpenML URL that will be accessed. This will be prefixes with _OPENML_PREFIX. data_home : str Directory to which the files will be cached. If None, no caching will be applied. n_retries : int, default=3 Number of retries when HTTP errors are encountered. Error with status code 412 won't be retried as they represent OpenML generic errors. delay : float, default=1.0 Number of seconds between retries. Returns ------- result : stream A stream to the OpenML resource. """ def is_gzip_encoded(_fsrc): return _fsrc.info().get("Content-Encoding", "") == "gzip" req = Request(_OPENML_PREFIX + openml_path) req.add_header("Accept-encoding", "gzip") if data_home is None: fsrc = _retry_on_network_error(n_retries, delay, req.full_url)(urlopen)(req, timeout=delay) if is_gzip_encoded(fsrc): return gzip.GzipFile(fileobj=fsrc, mode="rb") return fsrc local_path = _get_local_path(openml_path, data_home) dir_name, file_name = os.path.split(local_path) if not os.path.exists(local_path): os.makedirs(dir_name, exist_ok=True) try: # Create a tmpdir as a subfolder of dir_name where the final file will # be moved to if the download is successful. This guarantees that the # renaming operation to the final location is atomic to ensure the # concurrence safety of the dataset caching mechanism. with TemporaryDirectory(dir=dir_name) as tmpdir: with closing( _retry_on_network_error(n_retries, delay, req.full_url)(urlopen)( req, timeout=delay)) as fsrc: opener: Callable if is_gzip_encoded(fsrc): opener = open else: opener = gzip.GzipFile with opener(os.path.join(tmpdir, file_name), "wb") as fdst: shutil.copyfileobj(fsrc, fdst) shutil.move(fdst.name, local_path) except Exception: if os.path.exists(local_path): os.unlink(local_path) raise # XXX: First time, decompression will not be necessary (by using fsrc), but # it will happen nonetheless return gzip.GzipFile(local_path, "rb")
def testdoDownloadPackage(self): """Local download tests""" archive = self.__getArchiveInstance({}) archive.wantDownload(True) self.assertTrue(archive.canDownloadLocal()) with TemporaryDirectory() as tmp: audit = os.path.join(tmp, "audit.json.gz") content = os.path.join(tmp, "workspace") self.assertTrue( run( archive.downloadPackage(DummyStep(), DOWNLOAD_ARITFACT, audit, content))) self.__testWorkspace(audit, content) self.assertEqual( run( archive.downloadLocalLiveBuildId(DummyStep(), DOWNLOAD_ARITFACT)), b'\x00' * 20) # non-existent and erro cases with TemporaryDirectory() as tmp: audit = os.path.join(tmp, "audit.json.gz") content = os.path.join(tmp, "workspace") self.assertFalse( run( archive.downloadPackage(DummyStep(), NOT_EXISTS_ARTIFACT, audit, content))) self.assertFalse( run( archive.downloadPackage(DummyStep(), ERROR_DOWNLOAD_ARTIFACT, audit, content))) self.assertFalse( run( archive.downloadPackage(DummyStep(), ERROR_UPLOAD_ARTIFACT, audit, content))) self.assertEqual( run( archive.downloadLocalLiveBuildId(DummyStep(), NOT_EXISTS_ARTIFACT)), None) self.assertEqual( run( archive.downloadLocalLiveBuildId(DummyStep(), ERROR_DOWNLOAD_ARTIFACT)), None) self.assertEqual( run( archive.downloadLocalLiveBuildId(DummyStep(), ERROR_UPLOAD_ARTIFACT)), None) with self.assertRaises(BuildError): run( archive.downloadPackage(DummyStep(), BROKEN_ARTIFACT, audit, content)) with self.assertRaises(BuildError): run( archive.downloadPackage(DummyStep(), WRONG_VERSION_ARTIFACT, audit, content))
def train_experiment(engine=None): with TemporaryDirectory() as logdir: # sample data num_samples, num_features, num_classes1, num_classes2 = int(1e4), int( 1e1), 4, 10 X = torch.rand(num_samples, num_features) y1 = (torch.rand(num_samples) * num_classes1).to(torch.int64) y2 = (torch.rand(num_samples) * num_classes2).to(torch.int64) # pytorch loaders dataset = TensorDataset(X, y1, y2) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = CustomModule(num_features, num_classes1, num_classes2) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [2]) callbacks = [ dl.CriterionCallback(metric_key="loss1", input_key="logits1", target_key="targets1"), dl.CriterionCallback(metric_key="loss2", input_key="logits2", target_key="targets2"), dl.MetricAggregationCallback(metric_key="loss", metrics=["loss1", "loss2"], mode="mean"), dl.BackwardCallback(metric_key="loss"), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.AccuracyCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_", ), dl.AccuracyCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_", ), dl.CheckpointCallback( "./logs/one", loader_key="valid", metric_key="one_accuracy01", minimize=False, topk=1, ), dl.CheckpointCallback( "./logs/two", loader_key="valid", metric_key="two_accuracy03", minimize=False, topk=3, ), ] if SETTINGS.ml_required: # catalyst[ml] required callbacks.append( dl.ConfusionMatrixCallback( input_key="logits1", target_key="targets1", num_classes=num_classes1, prefix="one_cm", )) # catalyst[ml] required callbacks.append( dl.ConfusionMatrixCallback( input_key="logits2", target_key="targets2", num_classes=num_classes2, prefix="two_cm", )) # model training runner = CustomRunner() runner.train( engine=engine, model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=1, verbose=False, callbacks=callbacks, loggers={ "console": dl.ConsoleLogger(), "tb": dl.TensorboardLogger("./logs/tb"), }, )
def test_filter_json(self): """ Test that the ``json`` filter can be used. """ # We are going to overwrite the template file, so we have to disable the # cache in order to avoid problems. engine = JinjaEngine({'cache_enabled': False}) with TemporaryDirectory() as tmpdir: # We have to generate a template file that can be read by the # template engine. tmpdir_path = pathlib.Path(tmpdir) template_path = tmpdir_path / 'test.jinja' _write_file( template_path, """ {{ value | json }} """) value = OrderedDict() value['def'] = 456 value['abc'] = 123 self.assertEqual( '{"def": 456, "abc": 123}', engine.render(str(template_path), {'value': value})) # We also want to test the sort_keys option. _write_file( template_path, """ {{ value | json(sort_keys=False) }} """) self.assertEqual( '{"def": 456, "abc": 123}', engine.render(str(template_path), {'value': value})) _write_file( template_path, """ {{ value | json(sort_keys=True) }} """) self.assertEqual( '{"abc": 123, "def": 456}', engine.render(str(template_path), {'value': value})) # And we want to test the indent option (indent=None is the default) _write_file( template_path, """ {{ value | json }} """) self.assertEqual( '[1, 2]', engine.render(str(template_path), {'value': [1, 2]})) _write_file( template_path, """ {{ value | json(indent=None) }} """) self.assertEqual( '[1, 2]', engine.render(str(template_path), {'value': [1, 2]})) _write_file( template_path, """ {{ value | json(indent=0) }} """) self.assertEqual( '[\n1,\n2\n]', engine.render(str(template_path), {'value': [1, 2]})) _write_file( template_path, """ {{ value | json(indent=2) }} """) self.assertEqual( '[\n 1,\n 2\n]', engine.render(str(template_path), {'value': [1, 2]}))
def test_config_provide_transform_functions(self): """ Test the ``provide_transform_functions`` configuration option. """ with TemporaryDirectory() as tmpdir: # We have to generate a template file that can be read by the # template engine. tmpdir_path = pathlib.Path(tmpdir) template_path = tmpdir_path / 'test.jinja' _write_file( template_path, """ {{ transform['string.to_upper']('Some text') }} """) # We disable the cache for this test because it causes problems when # we rapidly change files. engine = JinjaEngine({'cache_enabled': False}) self.assertEqual('SOME TEXT', engine.render(str(template_path), {})) # Explicitly setting provide_transform_functions should not make a # difference. engine = JinjaEngine({ 'cache_enabled': False, 'provide_transform_functions': True }) self.assertEqual('SOME TEXT', engine.render(str(template_path), {})) # If we provide our own transform object in the context, this should # hide the transform object provided by the template engine because # context objects override globals. _write_file( template_path, """ {{ transform }} """) self.assertEqual( 'text from context', engine.render(str(template_path), {'transform': 'text from context'})) # The "is defined" check should succeed if there is a transform # object, and fail if there is none. _write_file( template_path, """ {{ transform is defined }} """) self.assertEqual('True', engine.render(str(template_path), {})) # Now, we set provide_transform_functions to False, which should # remove the transform object from the context. engine = JinjaEngine({ 'cache_enabled': False, 'provide_transform_functions': False }) self.assertEqual('False', engine.render(str(template_path), {})) # If we provide our own transform object, that object should be # available. _write_file( template_path, """ {{ transform }} """) self.assertEqual( 'text from context', engine.render(str(template_path), {'transform': 'text from context'}))
def setUp(self): self._temp_dir = TemporaryDirectory()
def __call__(self) -> bool: resolver = get_resolver(reqs=self.args.name) name = next(iter(resolver.graph.get_layer(0))).dependencies[0].name command = self.config.get('command') if not command: command = 'python' if isinstance(command, str): command = shlex.split(command) with TemporaryDirectory() as base_path: base_path = Path(base_path) # make venv venv = VEnv(path=base_path) if venv.exists(): self.logger.error('already installed', extra=dict(package=name)) return False python = get_python(self.config) self.logger.info('creating venv...', extra=dict( venv=str(venv.path), python=str(python.path), )) venv.create(python_path=python.path) # install ok = self._install(resolver=resolver, python_path=venv.python_path) if not ok: return False # install executable executable = venv.bin_path / command[0] if not executable.exists(): self.logger.warning('executable is not found in venv, trying to install...', extra=dict( executable=command[0], )) ok = self._install( resolver=get_resolver(reqs=command[:1]), python_path=venv.python_path, ) if not ok: return False if not executable.exists(): self.logger.error('package installed, but executable is not found') return False # make startup script to import installed packages startup_path = base_path / '_startup.py' packages = self._get_startup_packages(lib_path=venv.lib_path, packages=self.args.name) if not packages: self.logger.error('cannot find any packages') return False startup_path.write_text('import ' + ', '.join(sorted(packages))) # run self.logger.info('running...') with override_env_vars({'PYTHONSTARTUP': str(startup_path)}): result = subprocess.run([str(executable)] + command[1:]) if result.returncode != 0: self.logger.error('command failed', extra=dict(code=result.returncode)) return False return True
def setUp(self): self.tempdir = TemporaryDirectory() self.config_path = path_join(self.tempdir.name, 'config.yaml')
def main( output_file: str, entry_point: Optional[str], console_script: Optional[str], python: Optional[str], site_packages: Optional[str], compressed: bool, compile_pyc: bool, extend_pythonpath: bool, reproducible: bool, no_modify: bool, preamble: Optional[str], pip_args: List[str], ) -> None: """ Shiv is a command line utility for building fully self-contained Python zipapps as outlined in PEP 441, but with all their dependencies included! """ if not pip_args and not site_packages: sys.exit(NO_PIP_ARGS_OR_SITE_PACKAGES) if output_file is None: sys.exit(NO_OUTFILE) # check for disallowed pip arguments for disallowed in DISALLOWED_ARGS: for supplied_arg in pip_args: if supplied_arg in disallowed: sys.exit( DISALLOWED_PIP_ARGS.format( arg=supplied_arg, reason=DISALLOWED_ARGS[disallowed])) sources: List[Path] = [] with TemporaryDirectory() as tmp_site_packages: # If both site_packages and pip_args are present, we need to copy the site_packages # dir into our staging area (tmp_site_packages) as pip may modify the contents. if site_packages: if pip_args: for sp in site_packages: copytree(Path(sp), Path(tmp_site_packages)) else: sources.extend([Path(p).expanduser() for p in site_packages]) if pip_args: # Install dependencies into staged site-packages. pip.install(["--target", tmp_site_packages] + list(pip_args)) if preamble: bin_dir = Path(tmp_site_packages, "bin") bin_dir.mkdir(exist_ok=True) shutil.copy( Path(preamble).absolute(), bin_dir / Path(preamble).name) sources.append(Path(tmp_site_packages).absolute()) if no_modify: # if no_modify is specified, we need to build a map of source files and their # sha256 hashes, to be checked at runtime: hashes = {} for source in sources: for path in source.rglob("**/*.py"): hashes[str(path.relative_to(source))] = hashlib.sha256( path.read_bytes()).hexdigest() # if entry_point is a console script, get the callable if entry_point is None and console_script is not None: try: entry_point = find_entry_point(sources, console_script) except KeyError: if not console_script_exists(sources, console_script): sys.exit(NO_ENTRY_POINT.format(entry_point=console_script)) # Some projects need reproducible artifacts, so they can use SOURCE_DATE_EPOCH # environment variable to specify the timestamps in the zipapp. timestamp = int( os.environ.get( SOURCE_DATE_EPOCH_ENV, SOURCE_DATE_EPOCH_DEFAULT if reproducible else time.time())) # create runtime environment metadata env = Environment( built_at=datetime.utcfromtimestamp(timestamp).strftime( BUILD_AT_TIMESTAMP_FORMAT), entry_point=entry_point, script=console_script, compile_pyc=compile_pyc, extend_pythonpath=extend_pythonpath, shiv_version=__version__, no_modify=no_modify, reproducible=reproducible, preamble=Path(preamble).name if preamble else None, ) if no_modify: env.hashes = hashes # create the zip builder.create_archive( sources, target=Path(output_file).expanduser(), interpreter=python or get_interpreter_path(), main="_bootstrap:bootstrap", env=env, compressed=compressed, )
def start_python_dataflow( # pylint: disable=too-many-arguments self, job_name: str, variables: dict, dataflow: str, py_options: List[str], project_id: str, py_interpreter: str = "python3", py_requirements: Optional[List[str]] = None, py_system_site_packages: bool = False, append_job_name: bool = True, on_new_job_id_callback: Optional[Callable[[str], None]] = None, location: str = DEFAULT_DATAFLOW_LOCATION, ): """ Starts Dataflow job. :param job_name: The name of the job. :type job_name: str :param variables: Variables passed to the job. :type variables: Dict :param dataflow: Name of the Dataflow process. :type dataflow: str :param py_options: Additional options. :type py_options: List[str] :param project_id: The ID of the GCP project that owns the job. If set to ``None`` or missing, the default project_id from the GCP connection is used. :type project_id: Optional[str] :param py_interpreter: Python version of the beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 :param py_requirements: Additional python package(s) to install. If a value is passed to this parameter, a new virtual environment has been created with additional packages installed. You could also install the apache-beam package if it is not installed on your system or you want to use a different version. :type py_requirements: List[str] :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. This option is only relevant if the ``py_requirements`` parameter is not None. :type py_interpreter: str :param append_job_name: True if unique suffix has to be appended to job name. :type append_job_name: bool :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. :param on_new_job_id_callback: Callback called when the job ID is known. :type on_new_job_id_callback: callable :param location: Job location. :type location: str """ name = self._build_dataflow_job_name(job_name, append_job_name) variables['job_name'] = name variables['region'] = location def label_formatter(labels_dict): return [ f'--labels={key}={value}' for key, value in labels_dict.items() ] if py_requirements is not None: if not py_requirements and not py_system_site_packages: warning_invalid_environment = textwrap.dedent("""\ Invalid method invocation. You have disabled inclusion of system packages and empty list required for installation, so it is not possible to create a valid virtual environment. In the virtual environment, apache-beam package must be installed for your job to be \ executed. To fix this problem: * install apache-beam on the system, then set parameter py_system_site_packages to True, * add apache-beam to the list of required packages in parameter py_requirements. """) raise AirflowException(warning_invalid_environment) with TemporaryDirectory(prefix='dataflow-venv') as tmp_dir: py_interpreter = prepare_virtualenv( venv_directory=tmp_dir, python_bin=py_interpreter, system_site_packages=py_system_site_packages, requirements=py_requirements, ) command_prefix = [py_interpreter] + py_options + [dataflow] self._start_dataflow( variables=variables, name=name, command_prefix=command_prefix, label_formatter=label_formatter, project_id=project_id, on_new_job_id_callback=on_new_job_id_callback, location=location, ) else: command_prefix = [py_interpreter] + py_options + [dataflow] self._start_dataflow( variables=variables, name=name, command_prefix=command_prefix, label_formatter=label_formatter, project_id=project_id, on_new_job_id_callback=on_new_job_id_callback, location=location, )
def execute(self, fcrepo, args): start_time = datetime.now().timestamp() count = 0 errors = 0 total = len(args.uris) try: serializer_class = SERIALIZER_CLASSES[args.format] except KeyError: logger.error(f'Unknown format: {args.format}') raise FailureException() if args.export_binaries and args.binary_types is not None: # filter files by their MIME type def mime_type_filter(file): return str(file.mimetype) in args.binary_types.split(',') else: # default filter is None; in this case filter() will return # all items that evaluate to true mime_type_filter = None logger.info(f'Export destination: {args.output_dest}') # create a bag in a temporary directory to hold exported items temp_dir = TemporaryDirectory() bag = make_bag(temp_dir.name) export_dir = os.path.join(temp_dir.name, 'data') serializer = serializer_class(directory=export_dir, public_uri_template=args.uri_template) for uri in args.uris: try: logger.info(f'Exporting item {count + 1}/{total}: {uri}') # derive an item-level directory name from the URI # currently this is hard-coded to look for a UUID # TODO: expand to other types of unique ids? match = UUID_REGEX.search(uri) if match is None: raise DataReadException(f'No UUID found in {uri}') item_dir = match[0] graph = fcrepo.get_graph(uri) model_class = detect_resource_class(graph, uri, fallback=Item) obj = model_class.from_graph(graph, uri) if args.export_binaries: logger.info(f'Gathering binaries for {uri}') binaries = list( filter(mime_type_filter, obj.gather_files(fcrepo))) total_size = sum(int(file.size[0]) for file in binaries) size, unit = format_size(total_size) logger.info( f'Total size of binaries: {round(size, 2)} {unit}') else: binaries = None serializer.write(obj, files=binaries, binaries_dir=item_dir) if binaries is not None: binaries_dir = os.path.join(export_dir, item_dir) os.makedirs(binaries_dir, exist_ok=True) for file in binaries: response = fcrepo.head(file.uri) accessed = parsedate(response.headers['Date']) modified = parsedate(response.headers['Last-Modified']) binary_filename = os.path.join(binaries_dir, str(file.filename)) with open(binary_filename, mode='wb') as binary: with file.source as stream: for chunk in stream: binary.write(chunk) # update the atime and mtime of the file to reflect the time of the # HTTP request and the resource's last-modified time in the repo os.utime(binary_filename, times=(mktime(accessed), mktime(modified))) logger.debug(f'Copied {file.uri} to {binary.name}') count += 1 except DataReadException as e: # log the failure, but continue to attempt to export the rest of the URIs logger.error(f'Export of {uri} failed: {e}') errors += 1 except (RESTAPIException, ConnectionError) as e: # log the failure, but continue to attempt to export the rest of the URIs logger.error(f'Unable to retrieve {uri}: {e}') errors += 1 # update the status now = datetime.now().timestamp() yield { 'time': { 'started': start_time, 'now': now, 'elapsed': now - start_time }, 'count': { 'total': total, 'exported': count, 'errors': errors } } try: serializer.finish() except EmptyItemListError: logger.error("No items could be exported; skipping writing file") logger.info(f'Exported {count} of {total} items') # save the BagIt bag to send to the output destination bag.save(manifests=True) # parse the output destination to determine where to send the export if args.output_dest.startswith('sftp:'): # send over SFTP to a remote host sftp_uri = urlsplit(args.output_dest) ssh_client = get_ssh_client(sftp_uri, key_filename=args.key) try: sftp_client = SFTPClient.from_transport( ssh_client.get_transport()) root, ext = splitext(basename(sftp_uri.path)) destination = sftp_client.open(sftp_uri.path, mode='w') except SSHException as e: raise FailureException(str(e)) from e else: # send to a local file zip_filename = args.output_dest root, ext = splitext(basename(zip_filename)) destination = zip_filename # write out a single ZIP file of the whole bag compress_bag(bag, destination, root) self.result = { 'type': 'export_complete' if count == total else 'partial_export', 'content_type': serializer.content_type, 'file_extension': serializer.file_extension, 'count': { 'total': total, 'exported': count, 'errors': errors } }
def setUp(self): super(RestoreTestCase, self).setUp() self.include = TemporaryDirectory() generic.create_test_files(self.sid.pathBackup(self.include.name))
def setUp(self): self.tempdir = TemporaryDirectory()
def compile_2k_merge(path): vcf = setup(path) vcfs = [comb.transform_gvcf(vcf)] * COMBINE_GVCF_MAX combined = [comb.combine_gvcfs(vcfs)] * 20 with TemporaryDirectory() as tmpdir: hl.experimental.write_matrix_tables(combined, os.path.join(tmpdir, 'combiner-multi-write'), overwrite=True)
def target_file(): with TemporaryDirectory() as d: filepath = pathlib.Path(d) / "temp.shp" filename = str(filepath.absolute()) yield filename
def graph(self): """ Returns a graphviz.Digraph for the directed graph the inheriting strategy represents. The graph can be rendered with: ``mystrategy.graph.render("filename") # renders to filename.png`` """ from graphviz import Digraph if (self._graph_cache['graph'] and self._graph_cache['path'] == self.path): return self._graph_cache['graph'] if not self._graph_cache['tempdir']: self._graph_cache['tempdir'] = TemporaryDirectory() dg = Digraph( filename=os.path.join(self._graph_cache['tempdir'].name, 'graph'), format='png', ) edges = [] dg.attr('node', style='filled', fillcolor='lightblue2', penwidth='1') dg.attr('edge', style='solid') for index, node_name in enumerate(self.path): attrs = {} if node_name == self.path[-1]: attrs = {'penwidth': '2'} dg.node(node_name, **attrs) if index < len(self.path) - 1: edges.append(( node_name, self.path[index + 1], )) dg.edge(*edges[-1]) dg.attr('node', style='filled', color='lightgrey', fillcolor='lightgrey') dg.attr('edge', style='dashed', arrowhead='empty') for node_name in self.states: if node_name not in self.path: dg.node(node_name) for edge in self.states[node_name]['dependencies']: if ( edge, node_name, ) in edges: continue dg.edge(edge, node_name) self._graph_cache['graph'] = dg return dg
import pytest import subprocess from importlib.machinery import SourceFileLoader from datetime import datetime, timedelta from tempfile import TemporaryDirectory from unittest import mock from unittest.mock import call relpath_updater_script = "../sdw_updater_gui/Updater.py" path_to_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), relpath_updater_script) updater = SourceFileLoader("Updater", path_to_script).load_module() from Updater import UpdateStatus # noqa: E402 from Updater import current_templates # noqa: E402 temp_dir = TemporaryDirectory().name debian_based_vms = [ "sd-app", "sd-log", "sd-viewer", "sd-gpg", "sd-proxy", "sd-whonix", "sd-devices", ] TEST_RESULTS_OK = { "dom0": UpdateStatus.UPDATES_OK, "fedora": UpdateStatus.UPDATES_OK, "sd-app": UpdateStatus.UPDATES_OK,
def convert(inputPath, outputPath=None, **kwargs): """ Take a source input file and output a pyramidal tiff file. :param inputPath: the path to the input file or base file of a set. :param outputPath: the path of the output file. Optional parameters that can be specified in kwargs: :param tileSize: the horizontal and vertical tile size. :param format: one of 'tiff' or 'aperio'. Default is 'tiff'. :param onlyFrame: None for all frames or the 0-based frame number to just convert a single frame of the source. :param compression: one of 'jpeg', 'deflate' (zip), 'lzw', 'packbits', 'zstd', or 'none'. :param quality: a jpeg or webp quality passed to vips. 0 is small, 100 is high quality. 90 or above is recommended. For webp, 0 is lossless. :param level: compression level for zstd, 1-22 (default is 10) and deflate, 1-9. :param predictor: one of 'none', 'horizontal', 'float', or 'yes' used for lzw and deflate. Default is horizontal for non-geospatial data and yes for geospatial. :param psnr: psnr value for jp2k, higher results in large files. 0 is lossless. :param cr: jp2k compression ratio. 1 is lossless, 100 will try to make a file 1% the size of the original, etc. :param subifds: if True (the default), when creating a multi-frame file, store lower resolution tiles in sub-ifds. If False, store all data in primary ifds. :param overwrite: if not True, throw an exception if the output path already exists. Additional optional parameters: :param geospatial: if not None, a boolean indicating if this file is geospatial. If not specified or None, this will be checked. :param _concurrency: the number of cpus to use during conversion. None to use the logical cpu count. :returns: outputPath if successful """ if kwargs.get('_concurrency'): os.environ['VIPS_CONCURRENCY'] = str(_concurrency_to_value(**kwargs)) geospatial = kwargs.get('geospatial') if geospatial is None: geospatial = is_geospatial(inputPath) suffix = format_hook('adjust_params', geospatial, kwargs, **kwargs) if suffix is False: return suffix = suffix or ('.tiff' if not geospatial else '.geo.tiff') if not outputPath: outputPath = os.path.splitext(inputPath)[0] + suffix if outputPath.endswith('.geo' + suffix): outputPath = outputPath[:len(outputPath) - len(suffix) - 4] + suffix if outputPath == inputPath: outputPath = (os.path.splitext(inputPath)[0] + '.' + time.strftime('%Y%m%d-%H%M%S') + suffix) if os.path.exists(outputPath) and not kwargs.get('overwrite'): raise Exception('Output file already exists.') try: tiffinfo = tifftools.read_tiff(inputPath) except Exception: tiffinfo = None if not kwargs.get('compression', None): kwargs = kwargs.copy() lossy = _is_lossy(inputPath, tiffinfo) logger.debug('Is file lossy: %r', lossy) eightbit = _is_eightbit(inputPath, tiffinfo) logger.debug('Is file 8 bits per samples: %r', eightbit) kwargs['_compression'] = None kwargs['compression'] = 'jpeg' if lossy and eightbit else 'lzw' if geospatial: _generate_geotiff(inputPath, outputPath, **kwargs) else: with TemporaryDirectory() as tempDir: tempPath = os.path.join(tempDir, os.path.basename(outputPath)) lidata = _data_from_large_image(inputPath, tempPath, **kwargs) logger.log(logging.DEBUG - 1, 'large_image information for %s: %r', inputPath, lidata) if not is_vips(inputPath) and lidata: _convert_large_image(inputPath, outputPath, tempPath, lidata, **kwargs) elif _is_multiframe(inputPath): _generate_multiframe_tiff(inputPath, outputPath, tempPath, lidata, **kwargs) else: _generate_tiff(inputPath, outputPath, tempPath, lidata, **kwargs) return outputPath
def setUp(self): self.tmp_dir = TemporaryDirectory() self.tmp_dir_path = Path(self.tmp_dir.name) os.chdir(self.tmp_dir.name) self.repo = self.get_repo()
def _create_temp_dir_next_to(path): return TemporaryDirectory(dir=str(Path(path).parent), prefix='', suffix='.tmp')
def test_trityper2h5(self): with TemporaryDirectory() as temporary_directory: self.assertFalse(trityper2h5.main( "--input exampleTriTyper --output {} --study_name dosage" .format(os.path.join(temporary_directory, "haseh5")).split(" ")))
def colorizable_files(): """populate temp dir with sample files. (too hard to emit indivual test cases when fixture invoked in mark.parametrize)""" with TemporaryDirectory() as tempdir: for k, v in _cf.items(): if v is None: continue if v.startswith("/"): file_path = v else: file_path = tempdir + "/" + v try: os.lstat(file_path) except FileNotFoundError: if file_path.endswith("_dir"): os.mkdir(file_path) else: open(file_path, "a").close() if k in ("di", "fi"): pass elif k == "ex": os.chmod(file_path, stat.S_IRWXU) # tmpdir on windows need u+w elif k == "ln": # cook ln test case. os.chmod(file_path, stat.S_IRWXU) # link to *executable* file os.rename(file_path, file_path + "_target") os.symlink(file_path + "_target", file_path) elif k == "or": os.rename(file_path, file_path + "_target") os.symlink(file_path + "_target", file_path) os.remove(file_path + "_target") elif k == "pi": # not on Windows os.remove(file_path) os.mkfifo(file_path) elif k == "su": os.chmod(file_path, stat.S_ISUID) elif k == "sg": os.chmod(file_path, stat.S_ISGID) elif k == "st": os.chmod( file_path, stat.S_ISVTX | stat.S_IRUSR | stat.S_IWUSR ) # TempDir requires o:r elif k == "tw": os.chmod( file_path, stat.S_ISVTX | stat.S_IWOTH | stat.S_IRUSR | stat.S_IWUSR, ) elif k == "ow": os.chmod(file_path, stat.S_IWOTH | stat.S_IRUSR | stat.S_IWUSR) elif k == "mh": os.rename(file_path, file_path + "_target") os.link(file_path + "_target", file_path) else: pass # cauterize those elseless ifs! os.symlink(file_path, file_path + "_symlink") yield tempdir pass # tempdir get cleaned up here.