def install(self): modfiles_txt = self.warrens_1_dir / "modfiles.txt" if modfiles_txt.exists(): modfiles_txt.unlink() if self.installed: print("already installed") else: Archive(str(self.filename)).extractall(self.warrens_1_dir)
def etc_unpack(): #temp_sample_dir_path=temp_sample_path.split('.')[0] signal.signal(signal.SIGALRM, handler) signal.alarm(4) try: Archive(temp_sample_path).extractall(c_c.PATH_TEMP) except: return
def download_from_dump(self, home, articles, key): if not os.path.isdir(home + '/knolml_dataset/phase_details'): download('knolml_dataset', verbose=True, glob_pattern='phase_details.7z', destdir=home) Archive('~/knolml_dataset/phase_details.7z').extractall('~/knolml_dataset') if not os.path.isdir(home + '/knolml_dataset/bz2t'): download('knolml_dataset', verbose=True, glob_pattern='bz2t.7z', destdir=home) Archive('~/knolml_dataset/bz2t.7z').extractall(home + '/knolml_dataset') fileList = glob.glob(home + '/knolml_dataset/phase_details/*.txt') for files in fileList: if 'phase' in files: with open(files, 'r') as myFile: for line in myFile: l = line.split('#$*$#') if l[0] in articles: print("Found hit for article " + l[0]) # file, art, index, home, key self.extract_from_bzip(file=l[1], art=l[0], index=int(l[2]), home=home, key=key)
def unpack_zip(file, unpack_folder): try: index, f = file Archive(f).extractall(unpack_folder) if index % 50 == 0: print(index) except: pass
def unzip(self, folder_output): print(self.file) print(folder_output) print(self.file.filename) output_file = "filee." + self.file.filename.rsplit('.', 1)[1] self.save(folder_output, output_file) if (self.file.filename.rsplit('.', 1)[1].lower() in {"zip", "rar"}): Archive(self.file).extractall(folder_output)
def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = dm_env.make_importer('label_me')(tmp_dir).make_dataset() masks_to_polygons = dm_env.transforms.get('masks_to_polygons') dataset = dataset.transform(masks_to_polygons) import_dm_annotations(dataset, task_data)
def func_extract_zip(in_path,out_path): #ref - https://www.kaggle.com/general/129520 if not os.path.exists(out_path): os.makedirs(out_path) Archive(in_path).extractall(out_path) for dirname, _, filenames in os.walk(out_path): for filename in filenames: print(os.path.join(dirname, filename))
def extract_archive(p_in: Union[Path, PosixPath], f_out: PosixPath, remove: bool = True) -> None: Archive(p_in.as_posix()).extractall(f_out.as_posix()) if remove: remove_file(p_in) return
def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'tf_detection_api', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data)
def load(file_object, annotations): from pyunpack import Archive import os import re from tempfile import TemporaryDirectory def match_frame(frame_info, filename): def get_filename(path): return os.path.splitext(os.path.basename(path))[0] # try to match by filename pascal_filename = get_filename(filename) for frame_number, info in frame_info.items(): cvat_filename = get_filename(info['path']) if cvat_filename == pascal_filename: return frame_number # try to extract framenumber from filename numbers = re.findall(r'\d+', filename) if numbers and len(numbers) == 1: return int(numbers[0]) raise Exception( 'Cannot match filename or determinate framenumber for {} filename'. format(filename)) def parse_xml_file(annotation_file): import xml.etree.ElementTree as ET root = ET.parse(annotation_file).getroot() frame_number = match_frame(annotations.frame_info, root.find('filename').text) for obj_tag in root.iter('object'): bbox_tag = obj_tag.find("bndbox") label = obj_tag.find('name').text xmin = float(bbox_tag.find('xmin').text) ymin = float(bbox_tag.find('ymin').text) xmax = float(bbox_tag.find('xmax').text) ymax = float(bbox_tag.find('ymax').text) annotations.add_shape( annotations.LabeledShape( type='rectangle', frame=frame_number, label=label, points=[xmin, ymin, xmax, ymax], occluded=False, attributes=[], )) archive_file = getattr(file_object, 'name') with TemporaryDirectory() as tmp_dir: Archive(archive_file).extractall(tmp_dir) for dirpath, _, filenames in os.walk(tmp_dir): for _file in filenames: if '.xml' == os.path.splitext(_file)[1]: parse_xml_file(os.path.join(dirpath, _file))
def unpack_sub_files(self, file, obj_dir, ext): file_unpack_dir = obj_dir[:-len(ext)] #print(file_unpack_dir) # already unpacked if os.path.isdir(file_unpack_dir): return file_unpack_dir self.make_dir(file_unpack_dir) Archive(obj_dir).extractall(file_unpack_dir) return file_unpack_dir
def main(args): parser = argparse.ArgumentParser("") parser.add_argument('-t', '--test', help='testing command', action='store_true') parser.add_argument('-i', '--inputfile') try: opt = parser.parse_args(args[1:]) except: parser.print_help() raise with open(opt.inputfile) as input: content = input.readlines() content = [x.strip() for x in content if x.strip()] cmd_get = "wget --quiet --output-document {}/{}.zip {}" cmd_rm = "rm {}/{}.zip" save_dir = "/wk_cms2/sam7k9621/MRT_analysis/MRT_crawl/results" if not os.path.exists(save_dir): os.makedirs(save_dir) url = "http://163.29.157.32:8080/dataset/98d67c29-464a-4003-9f78-b1cbb89bff59" re = requests.get(url) soup = BeautifulSoup(re.text, 'html.parser') widgets = [ pg.Timer(), " ", pg.AdaptiveETA(), " | ", pg.SimpleProgress(), " ", pg.Bar(), "[", pg.Percentage(), "]" ] pbar = pg.ProgressBar(widgets=widgets, maxval=len(content)).start() for idx, chose_date in enumerate(content): tag = soup.find(title="臺北捷運每日分時各站OD流量統計資料_{}".format(chose_date)) date = tag.get("title").split("_")[-1] url2 = "http://163.29.157.32:8080" + tag.get("href") re2 = requests.get(url2) soup2 = BeautifulSoup(re2.text, "html.parser") filepath = soup2.find( "a", class_="btn btn-primary resource-url-analytics resource-type-None" ).get("href") os.system(cmd_get.format(save_dir, date, filepath)) Archive("{}/{}.zip".format(save_dir, date)).extractall(save_dir) os.system(cmd_rm.format(save_dir, date)) pbar.update(idx + 1) pbar.finish() csvlst = os.listdir(save_dir) for csv in csvlst: csvpath = Path(save_dir + "/" + csv) csvpath.rename(save_dir + "/" + csv.split("_")[-1])
def unpack(self, filename, path_temp='temp'): '''un pack the zip/rar file to the temp_dir ''' path_output = os.path.join(self.data_path, path_temp) #if '.rar' in filename.lower(): #self.unrar(rar_file=filename, dir_name=path_output) #elif '.zip' in filename.lower(): #self.unzip(zip_name=filename, unzip_dir=path.output) self.mkdir(path=path_temp, isTrunk=True) Archive(filename).extractall(path_output)
def main(): files = GetZipPath() for file in files: try: Archive(file[1]).extractall(file[0]) except: print(file) print("----------error---------------") print("Done!")
def extract(tarfilename): with open(tarfilename, 'r') as tarfile: base = os.path.basename(tarfile.name) filename=base[:-4] pathdir=os.path.dirname(tarfile.name) currentDir=os.getcwd() checkDirectoryAndFile(pathdir, currentDir, filename) Archive(base).extractall(filename)
def handle(self, download): archive_file_path = download.biggest_file temp_folder_path = Path( mkdtemp(prefix=archive_file_path.name, dir=str(archive_file_path.parent))) Archive(str(archive_file_path)).extractall(str(temp_folder_path)) fake_download = Download(temp_folder_path) handler = HandlerFinder().find_handler(fake_download) handler.handle(fake_download)
def _import(src_file, instance_data, load_data_callback=None): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'voc', env=dm_env) dataset.transform('masks_to_polygons') if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data)
def decompress_file(filename): """Decompress file to directory with same basename :param filename: File path to decompress :type filename: Path """ target_dir = filename.parent / filename.stem Path.mkdir(target_dir, exist_ok=True) Archive(filename).extractall(target_dir)
def extractall(filename, directory, backend="auto", auto_create_dir=False): """ :param backend: auto, patool or zipfile :param filename: path to archive file :param directory: directory to extract to :param auto_create_dir: auto create directory """ Archive(filename, backend).extractall(directory, auto_create_dir=auto_create_dir)
def download(): global o urlretrieve(linkMAJ, chemin+"\\"+lastMAJ+".zip") Archive(chemin+"\\"+lastMAJ+".zip").extractall(chemin) os.remove(chemin+"\\"+lastMAJ+".zip") bar.stop() bar['mode']='determinate' bar['value']=100 os.startfile(fullPath+"\\"+lastMAJ+".exe")
def handlezip(local_name, yol, mkv): from pyunpack import Archive import shutil import codecs import glob def isUTF8(data): try: decoded = data.decode('UTF-8') except UnicodeDecodeError: return False else: for ch in decoded: if 0xD800 <= ord(ch) <= 0xDFFF: return False return True def get_bytes_from_file(filename): return open(filename, "rb").read() currentpath = local_name.rsplit('\\', 1)[0] + "\\" print("\t Extracting the archive: " + str(local_name)) Archive(local_name).extractall(currentpath) if local_name.endswith(".gz"): zipfiles = [f for f in glob.glob(currentpath + "\\" + "*")] extractedfile = zipfiles[0] else: zipfiles = [f for f in glob.glob(currentpath + "\\" + "*.srt")] if len(zipfiles) == 1: extractedfile = zipfiles[0] else: tobefound = re.search(r"s[0-9]{1,2}.e[0-9]{1,2}", mkv.lower())[0] for correct in zipfiles: if tobefound in correct.lower(): extractedfile = correct print("\t Renaming the file: " + str(extractedfile)) fullpath = yol + mkv[:-4] + ".tr.srt" os.rename(extractedfile, fullpath) result = isUTF8(get_bytes_from_file(fullpath)) if result is False: with codecs.open(fullpath, 'r', encoding='windows-1254', errors='ignore') as f: text = f.read() with codecs.open(fullpath, 'w', encoding='utf-8') as f: f.write(text) print("\t Removing: " + local_name.rsplit('\\', 1)[-1]) shutil.rmtree(currentpath)
def extract_archives(base_path): num_archives = 0 for dirpath, dirnames, filenames in os.walk(base_path): for file in filenames: if file.lower().endswith(archive_extensions): num_archives += 1 filepath = os.path.join(dirpath, file) Archive(filepath).extractall(base_path) os.remove(filepath) return num_archives
def extract(files): print('Extracting...') paths = [] for file in files: extracted_path = TEMPDIR + '\\fontin\\' + file + '\\' if not os.path.exists(extracted_path): os.makedirs(extracted_path) Archive(os.getcwd() + '\\' + file).extractall(extracted_path) paths.append(extracted_path) return paths
def extract_all(path_to_archive, name, filename): print(name) copyfile(f'media/{path_to_archive}', f'media/{filename}') try: os.mkdir(f'media/{name}/') Archive(f'media/{filename}').extractall(f'media/{name}/') except: return False return True
def __init__(self, source_path, step=1, start=0, stop=None): self._tmp_dir = create_tmp_dir() self._archive_source = source_path[0] Archive(self._archive_source).extractall(self._tmp_dir) super().__init__( source_path=[self._tmp_dir], step=step, start=start, stop=stop, )
def __init__(self, source_path, step=1, start=0, stop=None): self._archive_source = source_path[0] Archive(self._archive_source).extractall( os.path.dirname(source_path[0])) super().__init__( source_path=[os.path.dirname(source_path[0])], step=step, start=start, stop=stop, )
def downloadData(url, outPath, pathDir="", compress=False): fileName = os.path.basename(url) print(" Téléchargement du fichier " + fileName + "...") urllib.request.urlretrieve(url, outPath) if compress: print(" Extraction du fichier " + fileName + "...") Archive(outPath).extractall(pathDir)
def __init__(self, source_path, dest_path, image_quality, step=1, start=0, stop=0): Archive(source_path[0]).extractall(dest_path) super().__init__( source_path=[dest_path], dest_path=dest_path, image_quality=image_quality, step=1, start=0, stop=0, )
def unpack_single_file(archive: str, temp_directory: str): """ Unpacks an archive into a dedicated directory for that archive. """ print(f'Unpacking {archive}') archive_short_filename = basename(archive) name, extension = splitext(archive_short_filename) destination_dir = join(temp_directory, name) os.mkdir(destination_dir) Archive(archive).extractall(destination_dir) print(f'Unpacked {archive} to {destination_dir}')
def unpack_file(source, dest): dirname = tempfile.mkdtemp() Archive(source).extractall(dirname) for root, _, files in os.walk(dirname, topdown=False): for name in files: path = os.path.join(root, name) os.rename(path, dest) shutil.rmtree(dirname, True)