def test_binder_heuristics(self): from girder.plugins.wholetale.tasks.import_binder import sanitize_binder tale = Tale().createTale(self.image, [], creator=self.user, title="Binder") token = Token().createToken(user=self.user, days=0.25) tmpdir = tempfile.mkdtemp() with open(tmpdir + "/i_am_a_binder", "w") as fobj: fobj.write("but well hidden!") with tarfile.open(tmpdir + "/tale.tar.gz", "w:gz") as tar: tar.add(tmpdir + "/i_am_a_binder", arcname="dir_in_tar/i_am_a_binder") os.remove(tmpdir + "/i_am_a_binder") with zipfile.ZipFile(tmpdir + "/tale.zip", "w") as myzip: myzip.write(tmpdir + "/tale.tar.gz", arcname="dir_in_zip/tale.tar.gz") os.remove(tmpdir + "/tale.tar.gz") os.makedirs(tmpdir + "/hidden_binder") os.rename(tmpdir + "/tale.zip", tmpdir + "/hidden_binder" + "/tale.zip") girder_root = "http://localhost:{}".format( config.getConfig()["server.socket_port"] ) with WebDAVFS( girder_root, login=self.user["login"], password="******".format(**token), root="/tales/{_id}".format(**tale), ) as destination_fs, OSFS(tmpdir) as source_fs: copy_fs(source_fs, destination_fs) sanitize_binder(destination_fs) self.assertEqual(destination_fs.listdir("/"), ["i_am_a_binder"]) shutil.rmtree(tmpdir) Tale().remove(tale)
def sanitize_binder(root): root_listdir = root.listdir("/") if len(root_listdir) != 1: return single_file_or_dir = root_listdir[0] if root.isdir(single_file_or_dir): with root.opendir(single_file_or_dir) as subdir: copy_fs(subdir, root) root.removetree("/" + single_file_or_dir) sanitize_binder(root) if root.isfile(single_file_or_dir): if single_file_or_dir.endswith(".zip"): archive_fs = ReadZipFS elif ".tar" in single_file_or_dir: archive_fs = ReadTarFS else: archive_fs = None if archive_fs is not None: with archive_fs(root.openbin(single_file_or_dir)) as archive: copy_fs(archive, root) root.remove("/" + single_file_or_dir) sanitize_binder(root)
def tarfile(tmpdir_factory, filedata, tmp_fs): tardir = tmpdir_factory.mktemp('tar_fixture') path = Path(tardir, 'stuff.tar') tar_fs = tarfs.TarFS(path, write=True) copy_fs(tmp_fs, tar_fs) tar_fs.close() return path
def root(walker=None): base = load.root() mock = load.mock() copy_fs(base, mock, walker=walker) base.close() return mock
def copy(source_hook: FileSystemHookInterface, source_path: str, destination_hook: FileSystemHookInterface, destination_path: str): """ Copy a file from a source filesystem to a destination filesystem """ with source_hook as source_conn, destination_hook as dest_conn: print(f'Copy {source_path} to {destination_path}') copy_fs(source_conn.opendir(source_path), dest_conn.opendir(destination_path))
def join_files(tmpfs, outfs): """ Join the backup json files into a single package (tarball, zip). :param tmpfs: the temporary fs where the backup is stored. :param outfs: the filesystem to copy to (should be `TarFS` or `ZipFS`). things like compression and encoding should be specified at instantiation. """ copy_fs(tmpfs, outfs) outfs.close()
def install(dn, component): # populate dn['sandbox'] with the artifact files from component if os.path.exists(os.path.join(dn['sandbox'], 'baserock', component['name'] + '.meta')): return app.log(dn, 'Sandbox: installing %s' % component['cache'], verbose=True) if cache.get_cache(component) is False: app.log(dn, 'Unable to get cache for', component['name'], exit=True) unpackdir = cache.get_cache(component) + '.unpacked' if dn.get('kind') is 'system': copy_fs(unpackdir, dn['sandbox']) else: utils.hardlink_all_files(unpackdir, dn['sandbox'])
def write_out_backup(backing_store_fs, *, filepath=None, filesystem=None, prefix=''): """ Write the backup data to its final location. A backing store is required and either a filepath to the packaged backup or the tmp filesystem is required. :param required backing_store_fs: a pyfilesystem2 object to be the final storage location of the backup. (should be `OSFS`, `S3FS`, `FTPFS`, etc.) Can be a single object or list of filesystem objects for copying to multiple backing stores. :param filepath: path to the zip or tar file containing the backup data (if desired). Can be a path object or str. :param filesystem: the `TmpFS` containing the backup data. :param prefix: a parent directory for the files to be saved under. This is can be a good place to encode some information about the backup. A slash will be appended to the prefix to create a directory or pseudo-directory structure. """ if prefix and not prefix[-1] == '/': prefix = prefix + '/' if not isinstance(backing_store_fs, list): backing_store_fs = [backing_store_fs] if filepath: name = os.path.basename(filepath) for backing_fs in backing_store_fs: # read outfile directly from infile # if not backing_fs. try: backing_fs.makedir(prefix) except DirectoryExists: pass with backing_fs.open(prefix + name, 'wb') as outfile: with open(filepath, 'rb') as infile: outfile.write(infile.read()) elif filesystem: for backing_fs in backing_store_fs: if prefix: try: backing_fs.opendir(prefix) except ResourceNotFound: backing_fs.makedir(prefix) copy_fs(filesystem, backing_fs.opendir(prefix)) else: copy_fs(filesystem, backing_fs) else: raise AttributeError("filepath or filesystem is required.")
def install(dn, component): # populate dn['sandbox'] with the artifact files from component if os.path.exists( os.path.join(dn['sandbox'], 'baserock', component['name'] + '.meta')): return app.log(dn, 'Sandbox: installing %s' % component['cache'], verbose=True) if cache.get_cache(component) is False: app.log(dn, 'Unable to get cache for', component['name'], exit=True) unpackdir = cache.get_cache(component) + '.unpacked' if dn.get('kind') is 'system': copy_fs(unpackdir, dn['sandbox']) else: utils.hardlink_all_files(unpackdir, dn['sandbox'])
def persist_to_cas_via_memory(cfs, source_fs): """Example call: persist_to_cas_via_memory( CASFS("/Users/samritchie/casfs"), "/Users/samritchie/tester" # metrics directory ) """ with io.BytesIO() as f: # make sure the ZipFS closes before we attempt to transfer its contents # over to the content addressable store. with ZipFS(f, write=True) as zfs: copy_fs(source_fs, zfs) return cfs.put(f)
def prepare_gdrive(): print("Preparing GDrive") credentials = get_credentials() fs = GoogleDriveFS(credentials=credentials) try: test = fs.getinfo('test') fs.removetree('test') time.sleep(5) except ResourceNotFound: # old path not found on googledrive pass # copy database from local to gdrive test = fs.makedir('test') copy_fs(OSFS(os.path.join(base_path, 'Calibre_db')), test) fs.close()
def create_workdir(path: Optional[Union[str, Path]] = None, exist_ok: bool = False) -> Iterator[Path]: if path is None: with tempfile.TemporaryDirectory() as tempdir: yield Path(tempdir) return parsed = urlparse(str(path)) if parsed.scheme in ("", "file", "osfs"): os.makedirs(parsed.path, exist_ok=exist_ok) yield Path(parsed.path) return path = str(path) with open_fs(path) as fs: if not exist_ok and fs.exists: raise FileExistsError(f"File exists: {path}") with tempfile.TemporaryDirectory() as tempdir: yield Path(tempdir) copy_fs(tempdir, path)
def copySingle(srcFS, src, destFS, dest): """Copy single source to single destination. Args: src (basestring) dest (basestring) Raises: MissingDestinationException """ if dest is None: raise MissingDestinationException() if srcFS.isdir(unicode(src)): assertFS(destFS.getsyspath(unicode(dest))) copy_fs(srcFS.opendir(unicode(src)), destFS.opendir(unicode(dest))) if srcFS.isfile(unicode(src)): assertFS(destFS.getsyspath(unicode(os.path.dirname(dest)))) copy_file(srcFS, unicode(src), destFS, unicode(dest))
def test_repair_corruption(): fs1 = CASFS(MemoryFS(), width=2, depth=2) fs2 = CASFS(MemoryFS(), width=7, depth=1) # populate EACH with the same kv pairs, but structured different ways inside # the filesystem. ak1, bk1 = fs1.put(StringIO('A')), fs1.put(StringIO('B')) ak2, bk2 = fs2.put(StringIO('A')), fs2.put(StringIO('B')) # fs2 of course only has two items in it. assert fs2.count() == 2 # Now copy all of fs1 into fs2... copy_fs(fs1.fs, fs2.fs) # and note that it now has two copies of each item in the CAS. assert fs2.count() == 4 # Repair should kill the duplicates. fs2.repair() assert fs2.count() == 2 # fs2 itself is an iterable and has a length. assert len(fs2) == 2
passwd=ftp_args['password'], user=ftp_args['user']) if not istree: ftp_local, ftp_file = self._parse_file_name(ftp_args['relative_path']) try: ftpfs.makedirs(ftp_local) except Exception, error_msg: self.logger.error(str(error_msg)) copy_file(localfs, local_relative, ftpfs, ftp_args['relative_path']) else: try: ftpfs.makedirs(ftp_args['relative_path']) except Exception, error_msg: self.logger.error(str(error_msg)) ftp_remote = ftp_args['ftp_path'] + ftp_args['relative_path'] copy_fs(u"osfs://" + unicode(local_path), ftp_remote, walker=walker) except Exception, error_msg: self.logger.error(str(error_msg)) def download(self, remote_path, local_path, filter_regex=None, istree=False): try: # path prepare local_path = self._local_path_transfor(local_path) # osfs prepare localfs, local_relative = self.build_osfs(local_path) # walk prepare walker = None if filter_regex is not None: if not isinstance(filter_regex, list):
def encode_file_into_luby_blocks_func( folder_containing_art_image_and_metadata_files): global block_redundancy_factor global desired_block_size_in_bytes file_paths_in_folder = glob.glob( folder_containing_art_image_and_metadata_files + '*') for current_file_path in file_paths_in_folder: if current_file_path.split('.')[-1] in ['zst', 'tar']: try: os.remove(current_file_path) except Exception as e: print('Error: ' + str(e)) c_constant = 0.1 #Don't touch delta_constant = 0.5 #Don't touch start_time = time() ramdisk_object = MemoryFS() c_constant = 0.1 delta_constant = 0.5 seed = random.randint(0, 1 << 31 - 1) compressed_output_file_path, compressed_file_hash = add_art_image_files_and_metadata_to_zstd_compressed_tar_file_func( folder_containing_art_image_and_metadata_files) final_art_file__original_size_in_bytes = os.path.getsize( compressed_output_file_path) output_blocks_list = [ ] #Process compressed file into a stream of encoded blocks, and save those blocks as separate files in the output folder: print('Now encoding file ' + compressed_output_file_path + ' (' + str(round(final_art_file__original_size_in_bytes / 1000000)) + 'mb)\n\n') total_number_of_blocks_to_generate = ceil( (1.00 * block_redundancy_factor * final_art_file__original_size_in_bytes) / desired_block_size_in_bytes) print( 'Total number of blocks to generate for target level of redundancy: ' + str(total_number_of_blocks_to_generate)) with open(compressed_output_file_path, 'rb') as f: compressed_data = f.read() compressed_data_size_in_bytes = len(compressed_data) blocks = [ int.from_bytes( compressed_data[ii:ii + desired_block_size_in_bytes].ljust( desired_block_size_in_bytes, b'0'), 'little') for ii in range(0, compressed_data_size_in_bytes, desired_block_size_in_bytes) ] prng = PRNG(params=(len(blocks), delta_constant, c_constant)) prng.set_seed(seed) output_blocks_list = list() number_of_blocks_generated = 0 while number_of_blocks_generated < total_number_of_blocks_to_generate: random_seed, d, ix_samples = prng.get_src_blocks() block_data = 0 for ix in ix_samples: block_data ^= blocks[ix] block_data_bytes = int.to_bytes(block_data, desired_block_size_in_bytes, 'little') block_data_hash = hashlib.sha3_256(block_data_bytes).digest() block = (compressed_data_size_in_bytes, desired_block_size_in_bytes, random_seed, block_data_hash, block_data_bytes) header_bit_packing_pattern_string = '<3I32s' bit_packing_pattern_string = header_bit_packing_pattern_string + str( desired_block_size_in_bytes) + 's' length_of_header_in_bytes = struct.calcsize( header_bit_packing_pattern_string) packed_block_data = pack(bit_packing_pattern_string, *block) if number_of_blocks_generated == 0: #Test that the bit-packing is working correctly: with io.BufferedReader(io.BytesIO(packed_block_data)) as f: header_data = f.read(length_of_header_in_bytes) #first_generated_block_raw_data = f.read(desired_block_size_in_bytes) compressed_input_data_size_in_bytes_test, desired_block_size_in_bytes_test, random_seed_test, block_data_hash_test = unpack( header_bit_packing_pattern_string, header_data) if block_data_hash_test != block_data_hash: print( 'Error! Block data hash does not match the hash reported in the block header!' ) output_blocks_list.append(packed_block_data) number_of_blocks_generated = number_of_blocks_generated + 1 hash_of_block = get_sha256_hash_of_input_data_func(packed_block_data) output_block_file_path = 'FileHash__' + compressed_file_hash + '__Block__' + '{0:09}'.format( number_of_blocks_generated ) + '__BlockHash_' + hash_of_block + '.block' try: with ramdisk_object.open(output_block_file_path, 'wb') as f: f.write(packed_block_data) except Exception as e: print('Error: ' + str(e)) duration_in_seconds = round(time() - start_time, 1) print('\n\nFinished processing in ' + str(duration_in_seconds) + ' seconds! \nOriginal zip file was encoded into ' + str(number_of_blocks_generated) + ' blocks of ' + str(ceil(desired_block_size_in_bytes / 1000)) + ' kilobytes each. Total size of all blocks is ~' + str( ceil((number_of_blocks_generated * desired_block_size_in_bytes) / 1000000)) + ' megabytes\n') print('Now copying encoded files from ram disk to local storage...') block_storage_folder_path = folder_containing_art_image_and_metadata_files + os.sep + 'block_files' if not os.path.isdir(block_storage_folder_path): os.makedirs(block_storage_folder_path) filesystem_object = OSFS(block_storage_folder_path) copy_fs(ramdisk_object, filesystem_object) print('Done!\n') ramdisk_object.close() return duration_in_seconds
import fs from magicalimport import import_symbol # pip install fs magicalimport create_fs = import_symbol("./02inmemory.py:create") with create_fs("03copyfs") as my_fs: # copy filesystem from fs.copy import copy_fs # noqa def on_copy(src_fs, src_name: str, dst_fs, dst_name: str) -> None: import sys print(f"copied {src_fs}:{src_name} -> {dst_name}", file=sys.stderr) copy_fs(my_fs, fs.open_fs("./"), on_copy=on_copy)
def run(job): jobModel = Job() jobModel.updateJob(job, status=JobStatus.RUNNING) lookup_kwargs, = job["args"] user = User().load(job["userId"], force=True) tale = Tale().load(job["kwargs"]["taleId"], user=user) spawn = job["kwargs"]["spawn"] asTale = job["kwargs"]["asTale"] token = Token().createToken(user=user, days=0.5) progressTotal = 3 + int(spawn) progressCurrent = 0 try: # 0. Spawn instance in the background if spawn: instance = Instance().createInstance(tale, user, token, spawn=spawn) # 1. Register data using url progressCurrent += 1 jobModel.updateJob( job, status=JobStatus.RUNNING, progressTotal=progressTotal, progressCurrent=progressCurrent, progressMessage="Registering external data", ) dataIds = lookup_kwargs.pop("dataId") base_url = lookup_kwargs.get("base_url", DataONELocations.prod_cn) dataMap = pids_to_entities( dataIds, user=user, base_url=base_url, lookup=True ) # DataONE shouldn't be here imported_data = register_dataMap( dataMap, getOrCreateRootFolder(CATALOG_NAME), "folder", user=user, base_url=base_url, ) if dataMap[0]["repository"].lower().startswith("http"): resource = Item().load(imported_data[0], user=user, level=AccessType.READ) resourceType = "item" else: resource = Folder().load(imported_data[0], user=user, level=AccessType.READ) resourceType = "folder" data_set = [ { "itemId": imported_data[0], "mountPath": resource["name"], "_modelType": resourceType, } ] if asTale: if resourceType == "folder": # Create a dataset with the content of root ds folder, # so that it looks nicely and it's easy to copy to workspace later on workspace_data_set = [ { "itemId": folder["_id"], "mountPath": folder["name"], "_modelType": "folder ", } for folder in Folder().childFolders( parentType="folder", parent=resource, user=user ) ] workspace_data_set += [ { "itemId": item["_id"], "mountPath": item["name"], "_modelType": "item", } for item in Folder().childItems(resource) ] else: workspace_data_set = data_set # 2. Create a session # TODO: yay circular dependencies! IMHO we really should merge # wholetale and wt_data_manager plugins... from girder.plugins.wt_data_manager.models.session import Session # Session is created so that we can easily copy files to workspace, # without worrying about how to handler transfers. DMS will do that for us <3 session = Session().createSession(user, dataSet=workspace_data_set) # 3. Copy data to the workspace using WebDAVFS progressCurrent += 1 jobModel.updateJob( job, status=JobStatus.RUNNING, log="Copying files to workspace", progressTotal=progressTotal, progressCurrent=progressCurrent, progressMessage="Copying files to workspace", ) girder_root = "http://localhost:{}".format( config.getConfig()["server.socket_port"] ) with WebDAVFS( girder_root, login=user["login"], password="******".format(**token), root="/tales/{_id}".format(**tale), ) as destination_fs, DMSFS( str(session["_id"]), girder_root + "/api/v1", str(token["_id"]) ) as source_fs: copy_fs(source_fs, destination_fs) sanitize_binder(destination_fs) Session().deleteSession(user, session) else: # 3. Update Tale's dataSet update_citations = {_["itemId"] for _ in tale["dataSet"]} ^ { _["itemId"] for _ in data_set } tale["dataSet"] = data_set tale = Tale().updateTale(tale) if update_citations: eventParams = {"tale": tale, "user": user} event = events.trigger("tale.update_citation", eventParams) if len(event.responses): tale = Tale().updateTale(event.responses[-1]) # Tale is ready to be built tale = Tale().load(tale["_id"], user=user) # Refresh state tale["status"] = TaleStatus.READY tale = Tale().updateTale(tale) # 4. Wait for container to show up if spawn: progressCurrent += 1 jobModel.updateJob( job, status=JobStatus.RUNNING, log="Waiting for a Tale container", progressTotal=progressTotal, progressCurrent=progressCurrent, progressMessage="Waiting for a Tale container", ) sleep_step = 10 timeout = 15 * 60 while instance["status"] == InstanceStatus.LAUNCHING and timeout > 0: time.sleep(sleep_step) instance = Instance().load(instance["_id"], user=user) timeout -= sleep_step if timeout <= 0: raise RuntimeError( "Failed to launch instance {}".format(instance["_id"]) ) else: instance = None except Exception: tale = Tale().load(tale["_id"], user=user) # Refresh state tale["status"] = TaleStatus.ERROR tale = Tale().updateTale(tale) t, val, tb = sys.exc_info() log = "%s: %s\n%s" % (t.__name__, repr(val), traceback.extract_tb(tb)) jobModel.updateJob( job, progressTotal=progressTotal, progressCurrent=progressTotal, progressMessage="Task failed", status=JobStatus.ERROR, log=log, ) raise # To get rid of ObjectId's, dates etc. tale = json.loads( json.dumps(tale, sort_keys=True, allow_nan=False, cls=JsonEncoder) ) instance = json.loads( json.dumps(instance, sort_keys=True, allow_nan=False, cls=JsonEncoder) ) jobModel.updateJob( job, status=JobStatus.SUCCESS, log="Tale created", progressTotal=progressTotal, progressCurrent=progressTotal, progressMessage="Tale created", otherFields={"result": {"tale": tale, "instance": instance}}, )
def encode_final_art_zipfile_into_luby_transform_blocks_func( sha256_hash_of_art_file): global block_storage_folder_path global block_redundancy_factor global desired_block_size_in_bytes global prepared_final_art_zipfiles_folder_path start_time = time() ramdisk_object = MemoryFS() filesystem_object = OSFS(block_storage_folder_path) c_constant = 0.1 delta_constant = 0.5 seed = randint(0, 1 << 31 - 1) path_to_final_artwork_zipfile_including_metadata = glob.glob( prepared_final_art_zipfiles_folder_path + '*' + sha256_hash_of_art_file + '*')[0] final_art_file__original_size_in_bytes = os.path.getsize( path_to_final_artwork_zipfile_including_metadata) output_blocks_list = [ ] #Process ZIP file into a stream of encoded blocks, and save those blocks as separate files in the output folder: print('Now encoding file ' + os.path.split(path_to_final_artwork_zipfile_including_metadata)[-1] + ' (' + str(round(final_art_file__original_size_in_bytes / 1000000)) + 'mb)\n\n') total_number_of_blocks_to_generate = ceil( (1.00 * block_redundancy_factor * final_art_file__original_size_in_bytes) / desired_block_size_in_bytes) print( 'Total number of blocks to generate for target level of redundancy: ' + str(total_number_of_blocks_to_generate)) pbar = tqdm(total=total_number_of_blocks_to_generate) with open(path_to_final_artwork_zipfile_including_metadata, 'rb') as f: f_bytes = f.read() filesize = len(f_bytes) art_zipfile_hash = hashlib.sha256(f_bytes).hexdigest() if art_zipfile_hash == sha256_hash_of_art_file: #Convert file byte contents into blocksize chunks, padding last one if necessary: blocks = [ int.from_bytes( f_bytes[ii:ii + desired_block_size_in_bytes].ljust( desired_block_size_in_bytes, b'0'), sys.byteorder) for ii in range(0, len(f_bytes), desired_block_size_in_bytes) ] number_of_blocks = len(blocks) print('The length of the blocks list: ' + str(number_of_blocks)) prng = PRNG(params=(number_of_blocks, delta_constant, c_constant)) prng.set_seed(seed) number_of_blocks_generated = 0 # block generation loop while number_of_blocks_generated <= total_number_of_blocks_to_generate: update_skip = 1 if (number_of_blocks_generated % update_skip) == 0: pbar.update(update_skip) blockseed, d, ix_samples = prng.get_src_blocks() block_data = 0 for ix in ix_samples: block_data ^= blocks[ix] block = (filesize, desired_block_size_in_bytes, blockseed, int.to_bytes(block_data, desired_block_size_in_bytes, sys.byteorder) ) # Generate blocks of XORed data in network byte order number_of_blocks_generated = number_of_blocks_generated + 1 packed_block_data = pack('!III%ss' % desired_block_size_in_bytes, *block) output_blocks_list.append(packed_block_data) hash_of_block = hashlib.sha256(packed_block_data).hexdigest() output_block_file_path = 'FileHash__' + art_zipfile_hash + '__Block__' + '{0:09}'.format( number_of_blocks_generated ) + '__BlockHash_' + hash_of_block + '.block' try: with ramdisk_object.open(output_block_file_path, 'wb') as f: f.write(packed_block_data) except Exception as e: print('Error: ' + str(e)) duration_in_seconds = round(time() - start_time, 1) print('\n\nFinished processing in ' + str(duration_in_seconds) + ' seconds! \nOriginal zip file was encoded into ' + str(number_of_blocks_generated) + ' blocks of ' + str(ceil(desired_block_size_in_bytes / 1000)) + ' kilobytes each. Total size of all blocks is ~' + str( ceil((number_of_blocks_generated * desired_block_size_in_bytes) / 1000000)) + ' megabytes\n') print('Now copying encoded files from ram disk to local storage...') copy_fs(ramdisk_object, filesystem_object) print('Done!\n') ramdisk_object.close() return duration_in_seconds
def run(job): jobModel = Job() jobModel.updateJob(job, status=JobStatus.RUNNING) tale_dir, manifest_file = job["args"] user = User().load(job["userId"], force=True) tale = Tale().load(job["kwargs"]["taleId"], user=user) token = Token().createToken(user=user, days=0.5, scope=(TokenScope.USER_AUTH, REST_CREATE_JOB_TOKEN_SCOPE)) progressTotal = 3 progressCurrent = 0 try: os.chdir(tale_dir) with open(manifest_file, "r") as manifest_fp: manifest = json.load(manifest_fp) # 1. Register data progressCurrent += 1 jobModel.updateJob( job, status=JobStatus.RUNNING, progressTotal=progressTotal, progressCurrent=progressCurrent, progressMessage="Registering external data", ) dataIds = [obj["identifier"] for obj in manifest["Datasets"]] dataIds += [ obj["uri"] for obj in manifest["aggregates"] if obj["uri"].startswith("http") ] if dataIds: dataMap = pids_to_entities( dataIds, user=user, base_url=DataONELocations.prod_cn, lookup=True) # DataONE shouldn't be here register_dataMap( dataMap, getOrCreateRootFolder(CATALOG_NAME), "folder", user=user, base_url=DataONELocations.prod_cn, ) # 2. Construct the dataSet dataSet = [] for obj in manifest["aggregates"]: if "bundledAs" not in obj: continue uri = obj["uri"] fobj = File().findOne( {"linkUrl": uri}) # TODO: That's expensive, use something else if fobj: dataSet.append({ "itemId": fobj["itemId"], "_modelType": "item", "mountPath": obj["bundledAs"]["filename"], }) # TODO: handle folders # 3. Update Tale's dataSet update_citations = {_["itemId"] for _ in tale["dataSet"] } ^ {_["itemId"] for _ in dataSet} tale["dataSet"] = dataSet tale = Tale().updateTale(tale) if update_citations: eventParams = {"tale": tale, "user": user} event = events.trigger("tale.update_citation", eventParams) if len(event.responses): tale = Tale().updateTale(event.responses[-1]) # 4. Copy data to the workspace using WebDAVFS (if it exists) progressCurrent += 1 jobModel.updateJob( job, status=JobStatus.RUNNING, progressTotal=progressTotal, progressCurrent=progressCurrent, progressMessage="Copying files to workspace", ) orig_tale_id = pathlib.Path(manifest_file).parts[0] for workdir in ("workspace", "data/workspace", None): if workdir: workdir = os.path.join(orig_tale_id, workdir) if os.path.isdir(workdir): break if workdir: password = "******".format(**token) root = "/tales/{_id}".format(**tale) url = "http://localhost:{}".format( config.getConfig()["server.socket_port"]) with WebDAVFS(url, login=user["login"], password=password, root=root) as webdav_handle: copy_fs(OSFS(workdir), webdav_handle) # Tale is ready to be built tale = Tale().load(tale["_id"], user=user) # Refresh state tale["status"] = TaleStatus.READY tale = Tale().updateTale(tale) progressCurrent += 1 jobModel.updateJob( job, status=JobStatus.SUCCESS, log="Tale created", progressTotal=progressTotal, progressCurrent=progressCurrent, progressMessage="Tale created", ) except Exception: tale = Tale().load(tale["_id"], user=user) # Refresh state tale["status"] = TaleStatus.ERROR tale = Tale().updateTale(tale) t, val, tb = sys.exc_info() log = "%s: %s\n%s" % (t.__name__, repr(val), traceback.extract_tb(tb)) jobModel.updateJob(job, status=JobStatus.ERROR, log=log) raise
with indent(2): if "modtemplate.zip" not in cwdfs.listdir("/"): puts("Downloading ModTemplate") r = requests.get( "https://github.com/Monika-After-Story/DDLCModTemplate/releases/download/v1.1.0/DDLCModTemplate_1.1.0.zip", stream=True) r.raise_for_status() with cwdfs.open("modtemplate.zip", 'wb') as fd: total_length = int(r.headers.get('content-length')) for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1): fd.write(chunk) puts("Extracting ModTemplate") with ZipFS("./modtemplate.zip") as zipfs: fscopy.copy_fs(zipfs, tempfs.makedirs("renpy/My DDLC Mod")) cwdfs.remove("modtemplate.zip") puts("Adding game rpas") with indent(2): if "ddlc-win.zip" not in cwdfs.listdir("/"): puts("Downloading DDLC") with indent(2): puts("Getting URL") r = requests.post( "https://teamsalvato.itch.io/ddlc/file/594897") r.raise_for_status() ddlcurl = r.json()["url"] puts("Downloading") r = requests.get(ddlcurl, stream=True) with cwdfs.open("ddlc-win.zip", 'wb') as fd:
# # # Run this if not initializing ZIP file queue # q = pq.SQLiteAckQueue(ZIP_QUEUE, auto_commit=True) localdir = fs.open_fs(".") logger.info(f"Getting zipfiles from {ZIP_SOURCE}") with fs.open_fs(ZIP_SOURCE) as zipdir: ziplist = [ ZipFS(fs.path.combine(zipdir.getsyspath("/"), z)) for z in (zipdir.walk.files(filter=["*.zip"], max_depth=1)) ] for zipfs in ziplist: with fs.open_fs("mem://") as cache: logger.info(f"Extracting from {zipfs}") copy_fs(zipfs, cache) for unzipped in cache.walk.files(): logger.info(f"Processing file {unzipped}") result = {"zipfile": zipfs._file, "path": unzipped.lstrip("/")} info = cache.getinfo(unzipped, namespaces=["details"]) result["name"] = info.name result["size"] = info.size with cache.open(unzipped, mode="rb") as fp: md5 = hashlib.md5() while True: data = fp.read(CHUNK_SIZE) if not data: break md5.update(data) result["md5"] = md5.hexdigest()
def to_ram(dir_path): mem_fs = MemoryFS() with OSFS(dir_path) as data_fs: copy_fs(data_fs, mem_fs, workers=8) return mem_fs