def run_pipeline(file_name, local, bucket, target_dir, overwrite): # AWS sts = boto3.client('sts') sts.get_caller_identity() # check credentials s3 = boto3.client('s3') transfer_config = TransferConfig(use_threads=True) # 7zip shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) Path(local).mkdir(parents=True, exist_ok=True) file = download_file(s3_client=s3, bucket=bucket, key=file_name, path=os.path.join(local, os.path.basename(file_name))) file_unzipped = unzip_file( zip_path=file, unzip_path=os.path.basename(file_name).split('.7z')[0], remove=True) upload_folder(s3_client=s3, in_path=file_unzipped, bucket=bucket, out_dir=target_dir, overwrite=overwrite, Config=transfer_config) remove_directory(file_unzipped)
def extract_package(self, package): if sys.version_info < (3, 5): try: import lzma del lzma except ImportError: pass else: try: shutil.register_unpack_format('xztar', ['.tar.xz', '.txz'], shutil._unpack_tarfile, [], "xz'ed tar-file") except shutil.RegistryError: pass target_dir = os.path.join(self.subdir_root, package.get('directory')) if os.path.isdir(target_dir): return extract_dir = self.subdir_root # Some upstreams ship packages that do not have a leading directory. # Create one for them. try: package.get('lead_directory_missing') os.mkdir(target_dir) extract_dir = target_dir except KeyError: pass shutil.unpack_archive( os.path.join(self.cachedir, package.get('source_filename')), extract_dir) if package.has_patch(): shutil.unpack_archive( os.path.join(self.cachedir, package.get('patch_filename')), self.subdir_root)
def extract_package(self, package): if sys.version_info < (3, 5): try: import lzma del lzma try: shutil.register_unpack_format('xztar', ['.tar.xz', '.txz'], shutil._unpack_tarfile, [], "xz'ed tar-file") except shutil.RegistryError: pass except ImportError: pass target_dir = os.path.join(self.subdir_root, package.get('directory')) if os.path.isdir(target_dir): return extract_dir = self.subdir_root # Some upstreams ship packages that do not have a leading directory. # Create one for them. try: package.get('lead_directory_missing') os.mkdir(target_dir) extract_dir = target_dir except KeyError: pass shutil.unpack_archive(os.path.join(self.cachedir, package.get('source_filename')), extract_dir) if package.has_patch(): shutil.unpack_archive(os.path.join(self.cachedir, package.get('patch_filename')), self.subdir_root)
def unpack_zip(raw_dir, temp_dir, fp_zip): """ Unpacks a zip file Supports .7z and .zip :param raw_dir: Directory for raw data containing zipped file :param temp_dir: Directory for temporary data for unzipped file :param fp_zip: File path of zipped file :return: file path of unzipped file """ # Unzips the current file if fp_zip.split('.')[-1] == '7z': # Registers format to .7zip try: shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) print('.7z registered for "7zip"') except: print('.7z is already registered for "7zip"') shutil.unpack_archive(raw_dir + fp_zip, temp_dir) else: try: ZipFile(raw_dir + fp_zip).extractall(path=temp_dir) except BadZipfile: print('File already unpacked') shutil.copy(raw_dir + fp_zip, temp_dir + fp_zip) fp_unzip = fp_zip print('Unzipped file path:', temp_dir + fp_unzip) return fp_unzip print('Unzipped', raw_dir + fp_zip, 'to', temp_dir) fp_unzip = max([temp_dir + file for file in os.listdir(temp_dir)], key=os.path.getctime) print('Unzipped file path:', temp_dir + fp_unzip) return fp_unzip
def test_unpack_registery(self): formats = get_unpack_formats() def _boo(filename, extract_dir, extra): self.assertEqual(extra, 1) self.assertEqual(filename, 'stuff.boo') self.assertEqual(extract_dir, 'xx') register_unpack_format('Boo', ['.boo', '.b2'], _boo, [('extra', 1)]) unpack_archive('stuff.boo', 'xx') # trying to register a .boo unpacker again self.assertRaises(RegistryError, register_unpack_format, 'Boo2', ['.boo'], _boo) # should work now unregister_unpack_format('Boo') register_unpack_format('Boo2', ['.boo'], _boo) self.assertIn(('Boo2', ['.boo'], ''), get_unpack_formats()) self.assertNotIn(('Boo', ['.boo'], ''), get_unpack_formats()) # let's leave a clean state unregister_unpack_format('Boo2') self.assertEqual(get_unpack_formats(), formats)
def unzip_7z(): val_data_7z_link = '17NusZQw2RKpBIvCKp6hW6RuJKY43SWqz' data_7z_link = '1-I3BtCzYE7swpKERGygRhlMnim2xX0_2' print("Downloading data...") gdd.download_file_from_google_drive(file_id=data_7z_link, dest_path=os.path.join( data_path, 'data.7z'), unzip=False) gdd.download_file_from_google_drive(file_id=val_data_7z_link, dest_path=os.path.join( val_data_path, 'val_data.7z'), unzip=False) print('Unzipping data...') shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) shutil.unpack_archive(os.path.join(data_path, 'data.7z'), data_path) os.remove(os.path.join(data_path, 'data.7z')) shutil.unpack_archive(os.path.join(val_data_path, 'val_data.7z'), val_data_path) os.remove(os.path.join(val_data_path, 'val_data.7z')) print("Downloading complete...")
def test_unpack_registery(self): formats = get_unpack_formats() def _boo(filename, extract_dir, extra): self.assertEquals(extra, 1) self.assertEquals(filename, 'stuff.boo') self.assertEquals(extract_dir, 'xx') register_unpack_format('Boo', ['.boo', '.b2'], _boo, [('extra', 1)]) unpack_archive('stuff.boo', 'xx') # trying to register a .boo unpacker again self.assertRaises(RegistryError, register_unpack_format, 'Boo2', ['.boo'], _boo) # should work now unregister_unpack_format('Boo') register_unpack_format('Boo2', ['.boo'], _boo) self.assertIn(('Boo2', ['.boo'], ''), get_unpack_formats()) self.assertNotIn(('Boo', ['.boo'], ''), get_unpack_formats()) # let's leave a clean state unregister_unpack_format('Boo2') self.assertEquals(get_unpack_formats(), formats)
def register(): """ Configures shutil.unpack_archive to use our custom unzipper """ if sys.platform != "win32": unregister_unpack_format('zip') register_unpack_format('zip', ['.zip'], _unpack_zipfile_with_permissions)
def register_new_archive_formats(): """Register new archive formats to uncompress """ registered_formats = [f[0] for f in shutil.get_unpack_formats()] for name, extensions, function in ADDITIONAL_ARCHIVE_FORMATS: if name in registered_formats: continue shutil.register_unpack_format(name, extensions, function)
def __init__(self, printer, storage): self.printer = printer self._storage = storage from py7zr import unpack_7zarchive shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) self._name = "workflow.unzip" self._aliases = ("w.uz",) self._min_arg_count = 1 self._max_arg_count = 1
def extract_7zip(self, filename, extract_dir=""): shutil.register_unpack_format('7zip', ['.7z'], self._extract_7zip) if extract_dir == "": if not exists("templates"): makedirs("templates") extract_dir = "templates" if ".7z" in filename: shutil.unpack_archive(filename=filename, extract_dir=extract_dir) else: raise ValueError( "Invalid input, please add .7z to the end of your file and try again" )
def get_files_from_url(url, raw_dir): if url.split('.')[-1] == '7z': # Registers format to .7zip try: shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) print('.7z registered for "7zip"') except: print('.7z is already registered for "7zip"') zip_fp = url.split('/')[-1] if not os.path.exists(raw_dir + zip_fp): urlretrieve(url, raw_dir + zip_fp) else: print('Already have zipped file in disk. Skipping download') return zip_fp
def load(src_dir: str): logger.debug("MIGRATING %s", src_dir) # FIXME: use a mandatory argument to force deleting of release data. Don't want to delete data by accident fs.clean_directory(settings.LIBRARY_ROOT) shutil.register_unpack_format('rar', ['.rar'], fs.unrar) for dir_entry in os.scandir(src_dir): if dir_entry.is_dir(): try: model_id = int(dir_entry.name) logger.debug("processing %s", dir_entry.path) mfs = ModelFileset(model_id, dir_entry) mfs.migrate() except: logger.exception("Un-model-library-like directory: %s", dir_entry.name)
def unpack_archive(filename, extract_dir=None): """shutil.unpack_archive wrapper to unpack ['.dat.bz2'] archive. :param filename: name of the archive. :param extract_dir: name of the target directory, where the archive is unpacked. If not provided, the current working directory is used. """ import shutil # hardcoded for .dat.bz2 if filename.endswith('.dat.bz2'): shutil.register_unpack_format( 'bzip2', ['dat.bz2'], _unpack_bz2, [], "bzip2'ed dat file") shutil.unpack_archive(filename, extract_dir, 'bzip2') shutil.unregister_unpack_format('bzip2') else: shutil.unpack_archive(filename, extract_dir)
def init_additional_unpackers(): """Add external libraries for unpacking files. Checks if `7z` or `unrar` are installed on the host system. """ if try_cmd('7z'): register_unpack_format('7zip', [ '.zipx', '.gz', '.z', '.cab', '.rar', '.lzh', '.7z', '.xz' ], un7z) elif try_cmd('unrar'): register_unpack_format('unrar', ['.rar'], unrar) formats = get_unpack_formats() formats = list(map(lambda item: item[1], formats)) formats = [item for sublist in formats for item in sublist] download.unpack_formats = formats
def test_register_unpack_archive(tmp_path): shutil.register_unpack_format("7zip", [".7z"], unpack_7zarchive) shutil.unpack_archive(str(testdata_path.joinpath("test_1.7z")), str(tmp_path)) target = tmp_path.joinpath("setup.cfg") expected_mode = 33188 expected_mtime = 1552522033 if os.name == "posix": assert target.stat().st_mode == expected_mode assert target.stat().st_mtime == expected_mtime m = hashlib.sha256() m.update(target.open("rb").read()) assert m.digest() == binascii.unhexlify("ff77878e070c4ba52732b0c847b5a055a7c454731939c3217db4a7fb4a1e7240") m = hashlib.sha256() m.update(tmp_path.joinpath("setup.py").open("rb").read()) assert m.digest() == binascii.unhexlify("b916eed2a4ee4e48c51a2b51d07d450de0be4dbb83d20e67f6fd166ff7921e49") m = hashlib.sha256() m.update(tmp_path.joinpath("scripts/py7zr").open("rb").read()) assert m.digest() == binascii.unhexlify("b0385e71d6a07eb692f5fb9798e9d33aaf87be7dfff936fd2473eab2a593d4fd")
def test_register_unpack_archive(tmp_path): shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) shutil.unpack_archive(os.path.join(testdata_path, 'test_1.7z'), tmp_path) target = tmp_path.joinpath("setup.cfg") expected_mode = 33188 expected_mtime = 1552522033 if os.name == 'posix': assert target.stat().st_mode == expected_mode assert target.stat().st_mtime == expected_mtime m = hashlib.sha256() m.update(target.open('rb').read()) assert m.digest() == binascii.unhexlify('ff77878e070c4ba52732b0c847b5a055a7c454731939c3217db4a7fb4a1e7240') m = hashlib.sha256() m.update(tmp_path.joinpath('setup.py').open('rb').read()) assert m.digest() == binascii.unhexlify('b916eed2a4ee4e48c51a2b51d07d450de0be4dbb83d20e67f6fd166ff7921e49') m = hashlib.sha256() m.update(tmp_path.joinpath('scripts/py7zr').open('rb').read()) assert m.digest() == binascii.unhexlify('b0385e71d6a07eb692f5fb9798e9d33aaf87be7dfff936fd2473eab2a593d4fd')
def _unzip_downloaded_files(self, destination_dirpath, explicit_list_of_files=None, in_thread=True): # Shutil is the best choice here, bc calling tarfile or zipfile separately for each format is # error-prone, while shutil.unpack_archive handles every case on its own. # However, shutil does not support simple '.gz' compressed files. I have to register an unpack format, # named "bio_gz". Then, I provide a list of extensions corresponding to the format. # Cannot put '.gz' directly, because it may overlap with the .tar.gz format and I don't want to mess # with builtin modules. # Then, the method requires a callable that will be used to unpack archives. # The callable must receive the path of the archive, followed by the directory # the archive must be extracted to. This callable is created in this module, is the reimplementation of # the gunzip command, called gunzip_shutil. try: shutil.register_unpack_format("bio_gz", [".fasta.gz", ".hmm.gz", ".gz"], gunzip_shutil) except shutil.RegistryError: # if it is already registered, ignore it. pass # selecting compressed files if not explicit_list_of_files: zip_files = [ fp for fp in self.downloaded_filepath_list if is_compressed(fp) ] else: zip_files = explicit_list_of_files # decompressing for element in zip_files: if not os.path.exists(destination_dirpath): os.makedirs(destination_dirpath) # In Python 3.8, unpacking a tar.gz file in this QThread causes a # segmentation fault. They will be upacked later in the main thread. if in_thread and element.endswith(".tar.gz"): continue print("@ UNZIPPING M4a", element) shutil.unpack_archive(element, destination_dirpath) os.remove(element) # cleaning compressed files self.downloaded_filepath_list.remove(element)
def _extract_modules(self, dep, archive_path: Path, output_path: Path) -> bool: # say to shutils that wheel can be parsed as zip if 'wheel' not in shutil._UNPACK_FORMATS: # type: ignore shutil.register_unpack_format( name='wheel', extensions=['.whl'], function=shutil._unpack_zipfile, # type: ignore ) with TemporaryDirectory( suffix=dep.name) as package_path: # type: Path # type: ignore package_path = Path(package_path) shutil.unpack_archive(str(archive_path), str(package_path)) if len(list(package_path.iterdir())) == 1: package_path = next(package_path.iterdir()) # find modules root = PackageRoot(name=dep.name, path=package_path) if not root.packages: self.logger.error('cannot find modules', extra=dict( dependency=dep.name, version=dep.group.best_release.version, )) return False # copy modules module_path = root.packages[0].path module_name = root.packages[0].module self.logger.info('copying module...', extra=dict( path=str( module_path.relative_to(package_path)), dependency=dep.name, )) shutil.copytree( src=str(module_path), dst=str(output_path.joinpath(*module_name.split('.'))), ) return True
def test_unpack_registery(self) -> None: formats = get_unpack_formats() def _boo(filename: str, extract_dir: str, extra: int) -> None: self.assertEqual(extra, 1) self.assertEqual(filename, "stuff.boo") self.assertEqual(extract_dir, "xx") register_unpack_format("Boo", [".boo", ".b2"], _boo, [("extra", 1)]) unpack_archive("stuff.boo", "xx") # trying to register a .boo unpacker again self.assertRaises(RegistryError, register_unpack_format, "Boo2", [".boo"], _boo) # should work now unregister_unpack_format("Boo") register_unpack_format("Boo2", [".boo"], _boo) self.assertIn(("Boo2", [".boo"], ""), get_unpack_formats()) self.assertNotIn(("Boo", [".boo"], ""), get_unpack_formats()) # let's leave a clean state unregister_unpack_format("Boo2") self.assertEqual(get_unpack_formats(), formats)
def actions(data_path=r"../data/shp_csv/", unziped_location=r"../data/unziped"): # data_path= "data/shp_csv/" all_dir = os.listdir(data_path) print(all_dir) all_dir_path = list(map(lambda x: data_path + x, all_dir)) print(all_dir_path) unziped_path = [] for folder_path in all_dir_path: file_name = os.listdir(folder_path)[ 0] # because there is only one file per folder unziped_path.append(folder_path + "/" + file_name) print(unziped_path) try: shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) except: pass # Extraction for unziped in unziped_path: shutil.unpack_archive(unziped, unziped_location) # Lighting our folders (around 15go of data) all_dir = os.listdir(unziped_location) for chiant in all_dir: full_file_unzipped = os.listdir(unziped_location + "/" + chiant) # I hate double for loop for item in full_file_unzipped: if item.endswith((".cpg", ".dbf", ".prj", ".cpg", ".prj")): os.remove(os.path.join(unziped_location + "/" + chiant, item)) pass
import argparse import copy import logging import pathlib import shutil import sys import tarfile import unittest from .base import Profile, Target, Scope from .build import Build from .config import ConfigDict from .tests import Skip, TestCase from . import compilers, targets if not any([ '.xz' in i[1] for i in shutil.get_unpack_formats() ]): def _extract_xz(filename, extract_dir): try: tarobj = tarfile.open(filename) except tarfile.TarError as e: raise ReadError('{} is not a tar file'.format(filename)) from e try: tarobj.extractall(extract_dir) finally: tarobj.close() shutil.register_unpack_format('XZ file', ['.xz'], _extract_xz, [], 'Tar file compressed with XZ (LZMA) algorithm')
from talos.core import config from talos.core import exceptions as base_ex from talos.core.i18n import _ from wecube_plugins_itsdangerous.common import exceptions try: HAS_FCNTL = True import fcntl except: HAS_FCNTL = False LOG = logging.getLogger(__name__) CONF = config.CONF # register jar,war,apk as zip file shutil.register_unpack_format('jar', ['.jar'], shutil._UNPACK_FORMATS['zip'][1]) shutil.register_unpack_format('war', ['.war'], shutil._UNPACK_FORMATS['zip'][1]) shutil.register_unpack_format('apk', ['.apk'], shutil._UNPACK_FORMATS['zip'][1]) def unpack_file(filename, unpack_dest): shutil.unpack_archive(filename, unpack_dest) @contextlib.contextmanager def lock(name, block=True, timeout=5): timeout = 1.0 * timeout if HAS_FCNTL: acquired = False
def unpack_gzip(archive_path: str, destination_dir: str): destination_filepath = os.path.join(destination_dir, Path(archive_path).stem) with gzip.open(archive_path, 'rb') as f_in: with open(destination_filepath, 'wb') as f_out: copyfileobj(f_in, f_out) def unpack_zip(archive_path: str, destination_dir: str): zfile = zipfile.ZipFile(archive_path) zfile.extractall(destination_dir) register_unpack_format('gzip', ['.gz'], unpack_gzip) unregister_unpack_format('zip') register_unpack_format('zip', ['.zip'], unpack_zip) @retry(delay=1, backoff=2, tries=4) def download_and_extract(url: str, destination_folder: str, total_mb_size: Optional[float] = None) -> bool: """Download and extract from url, if file has already been downloaded return False else True.""" try: # another process is currently (or was) working on the same url and destination folder key = url + destination_folder marker_filepath = os.path.join( destination_folder, sha256(key.encode()).hexdigest() + '.marker')
def __init__(self, printer, storage): super().__init__(printer, "workflow.unzip", ("w.uz",), 1, 3) self._storage = storage from py7zr import unpack_7zarchive shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)
# coding=utf-8 import requests import datetime from bs4 import BeautifulSoup import csv import wget import pandas as pd import os import glob from math import ceil from py7zr import unpack_7zarchive import shutil import subprocess shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) dir_path = '/home/ec2-user' files_zipped ='/home/ec2-user/files/' files_unpacked = '/home/ec2-user/unpack' def unpack(): files = [f for f in os.listdir(files_zipped) if os.path.isfile(files_zipped+f)] for f in files: if ".7z" in f and ".meta." in f and ".tmp" not in f and "stackoverflow" not in f: directory = dir_path+'/unpack/'+f.split('.')[0]+'.meta/' elif ".7z" in f and ".tmp" not in f and "stackoverflow" not in f: directory = dir_path+'/unpack/'+f.split('.')[0]+'/' elif "stackoverflow" in f: directory = dir_path+'/unpack/stackoverflow/' else:
DIST_WIN = Path('windows') / 'trylean' DIST_MAC = Path('macos') / 'trylean' DISTS = [DIST_LIN, DIST_WIN, DIST_MAC] DIST_ALL = Path('all') DATA_LIN = DIST_LIN / 'vscodium' / 'data' DATA_WIN = DIST_WIN / 'vscodium' / 'data' DATA_MAC = DIST_MAC / 'vscodium' / 'codium-portable-data' log = logging.getLogger("Make Lean bundle") log.setLevel(logging.INFO) if (log.hasHandlers()): log.handlers.clear() log.addHandler(logging.StreamHandler()) # We need to tell python that .vsix files are secretely zip files shutil.register_unpack_format('VScode extension', ['.vsix'], shutil._unpack_zipfile) g = Github() http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) def unpack_archive(fname: Path, tgt_dir: Path) -> None: """Unpack zip or tar.gz archive.""" # Unfortunately, zip file extraction from shutil does not preverse exec permission if fname.suffix == '.zip': subprocess.run(['unzip', str(fname), '-d', str(tgt_dir)]) else: shutil.unpack_archive(fname, tgt_dir) def latest_release(project: str) -> str:
def __init__(self): shutil.register_archive_format('7zip', pack_7zarchive, description='7zip archive') shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive)
import pprint # import shutil # # pprint.pprint(shutil.get_unpack_formats()) # import py7zr # # def decompress(file): # archive = py7zr.Archive(file) # archive.extractall(path="/tmp") from py7zr import pack_7zarchive, unpack_7zarchive import shutil # register file format at first. shutil.register_archive_format('7zip', pack_7zarchive, description='7zip archive') shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive, description='7zip archive') pprint.pprint(shutil.get_unpack_formats()) # # extraction # shutil.unpack_archive('test.7z', '/tmp') # compression shutil.make_archive('a', '7zip', '.') # pprint.pprint(shutil.get_unpack_formats())
def un_7z(filename): # register file format at first. # shutil.register_archive_format('7zip', pack_7zarchive, description='7zip archive') shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) # extraction shutil.unpack_archive(filename)
def registerUnzipFormat(name, extensions, function): shutil.register_unpack_format(name, extensions, function)
Unpack .gz `filename` to `extract_dir` """ import gzip # late import for breaking circular dependency if not os.path.exists(extract_dir): os.mkdir(extract_dir) unpacked_file_name = os.path.join(extract_dir, os.path.split(full_file_name)[1][:-3]) with gzip.open(full_file_name, 'rb') as f_in, open(unpacked_file_name, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) shutil.register_unpack_format("rar", [".rar"], _unpack_rarfile) # Registering RAR shutil.register_unpack_format("gz", [".gz"], _unpack_gzfile) # Registering GZ def is_compressed(file): """ Check if file is compresses in zip, tar or rar format """ filename, file_extension = os.path.splitext(file) return file_extension in [ format for unpack_format in shutil.get_unpack_formats() for format in unpack_format[1] ] def get_compressed_files(dirname, filename_only=True): if (filename_only): return [
import os import shutil from minecraft_server_tools import sync_mods from minecraft_server_tools.constants import ( SERVER_DIR, SEARCHABLE_MODS_NAME, SEARCHABLE_CLIENT_MODS_NAME, ) SEARCHABLE_MODS_DIR = os.path.join(SERVER_DIR, SEARCHABLE_MODS_NAME) SEARCHABLE_CLIENT_MODS_DIR = os.path.join(SERVER_DIR, SEARCHABLE_CLIENT_MODS_NAME) shutil.register_unpack_format("jar", [".jar"], shutil._unpack_zipfile) def unzip_mods_in_to(from_mods, to_dir): print(f"\nUnzipping...") for mod_name, mod_path in from_mods.items(): unpack_dir = os.path.join(to_dir, mod_name) if not os.path.exists(unpack_dir): mod_dir = os.path.basename(os.path.dirname(mod_path)) print(f"\t{os.path.join(mod_dir, mod_name)}...") shutil.unpack_archive(mod_path, unpack_dir) def main(): sync_mods.main() mods = sync_mods.get_location_table_for(sync_mods.MODS_DIR) client_mods = sync_mods.get_location_table_for(sync_mods.CLIENT_MODS_DIR)
) from beta_rec.utils.common_util import ( get_dataframe_from_npz, save_dataframe_as_npz, timeit, un_zip, ) from beta_rec.utils.constants import DEFAULT_ORDER_COL, DEFAULT_TIMESTAMP_COL from beta_rec.utils.download import download_file, get_format from beta_rec.utils.onedrive import OneDrive default_root_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) # register 7z unpack shutil.register_unpack_format("7zip", [".7z"], unpack_7zarchive) class DatasetBase(object): """Base class for processing raw dataset into interactions, making and loading data splits. This is an beta dataset which can derive to other dataset. Several directory that store the dataset file would be created in the initial process. Attributes: dataset_name: the dataset name. url: the url of raw files. manual_download_url: the url that users use to download raw files manually """ def __init__( self,
finally: os.chdir(orig_dir) from . import common from .io import parse_package_config def _make_whlfile(*args, owner=None, group=None, **kwargs): return shutil._make_zipfile(*args, **kwargs) # type: ignore[attr-defined] shutil.register_archive_format("whl", _make_whlfile, description="Wheel file") shutil.register_unpack_format( "whl", [".whl", ".wheel"], shutil._unpack_zipfile, description="Wheel file" # type: ignore[attr-defined] ) def exit_with_stdio(result: subprocess.CompletedProcess[str]) -> NoReturn: if result.stdout: print(" stdout:") print(textwrap.indent(result.stdout, " ")) if result.stderr: print(" stderr:") print(textwrap.indent(result.stderr, " ")) raise SystemExit(result.returncode) class BashRunnerWithSharedEnvironment: