def rar_archive(self, file_read): # method for rar archive processing # the same principle as in zip archive method done_set = set() try: with RarFile(file_read) as my_archive: for i in my_archive.namelist(): with my_archive.open(i) as myfile: try: xml_dict = xmltodict.parse(myfile) context = my_archive.read(i) statform = xml_dict['NBUSTATREPORT']['HEAD']['STATFORM'] kod = xml_dict['NBUSTATREPORT']['HEAD']['EDRPOU'] report_date = xml_dict['NBUSTATREPORT']['HEAD']['REPORTDATE'] if statform in self.files_to_save_list and report_date == self.analysis_date: with open(self.output_path + '/' + statform + '_' + kod + '.xml', 'wb') as output_file: output_file.write(context) done_set.add(statform) except Exception: try: with RarFile(myfile) as inner_archive: for y in inner_archive.namelist(): with inner_archive.open(y) as myfile: try: xml_dict = xmltodict.parse(myfile) context = my_archive.read(y) statform = xml_dict['NBUSTATREPORT']['HEAD']['STATFORM'] kod = xml_dict['NBUSTATREPORT']['HEAD']['EDRPOU'] report_date = xml_dict['NBUSTATREPORT']['HEAD']['REPORTDATE'] if statform in self.files_to_save_list \ and report_date == self.analysis_date: with open(self.output_path + '/' + statform + kod + '.xml', 'wb') as output_file: output_file.write(context) done_set.add(statform) except Exception: pass except Exception: pass except Exception: pass for x in self.files_to_save_list: if x in done_set: self.ui.textBrowser.append(x + '- OK') QtWidgets.QApplication.processEvents() else: self.ui.textBrowser.append(x + '-' + "<span style=\" font-size:8pt; font-weight:600;" "color:#ff0000;\" >ERROR!</span>") QtWidgets.QApplication.processEvents()
def xtract_rar(inpt: str, password: str = ''): ## {{{ from os import path, mkdir, chdir from rarfile import RarFile inpt = remove_trailing_slash(inpt) root_base, ext = path.splitext(inpt) dest_dir = root_base mkdir(dest_dir) chdir(dest_dir) if password == '': with RarFile(inpt, 'r') as CUR_RAR: CUR_RAR.extractall() else: with RarFile(inpt, 'r') as CUR_RAR: CUR_RAR.extractall(pwd=password)
def each(self, target): tmpdir = tempdir() rf = RarFile(target) namelist = rf.namelist() for name in namelist: try: rf.extract(name, tmpdir) filepath = os.path.join( tmpdir, name, ) if os.path.isfile(filepath): self.add_extracted_file(filepath) except RuntimeError: for password in ['virus', 'infected']: try: filepath = rf.extract(name, tmpdir, pwd=password) if os.path.isfile(filepath): self.add_extracted_file(filepath) break except RuntimeError: pass else: self.log('error', 'Could not extract {}'.format(name)) return True
def _init_def(path, capture=_def_formats()): if path.suffix.lower() in ('.zip', '.cbz'): o = ZipFile(str(path)) elif path.suffix.lower() in ('.rar', '.cbr'): o = RarFile(str(path)) o.hpx_path = path return o
def extrair_arquivos(tipo: str, caminho: str, conteudo): """ Extrai csvs de um arquivo compactado. Parâmetros ---------- tipo: str tipo do arquivo: rar ou zip. caminho: str caminho para pasta onde os arquivos devem ser salvos. conteudo bytes do arquivo compactado. """ if tipo == "x-rar-compressed": # Salva temporariamente o .rar with open('file.rar', 'wb') as f: f.write(conteudo) # Extrai arquivos csv with RarFile("file.rar") as rf: extrair_rar(rf, caminho) # Apaga .rar os.remove('file.rar') else: zipfile = ZipFile(BytesIO(conteudo)) with zipfile as zp: extrair_zip(zp, caminho)
def __init__(self, shuffled=False): """ As the data needs to be downloaded from the website, the initialiser handles that as well, then calls _load_all_data to handle loading the contents of the downloaded data. :param shuffled: boolean Pass true to shuffle the dataset. This parameter is not very meaningful in this case because _load_all_data shuffles the data anyway. """ self.data_length = 200 self.training_fraction = 0.2 expected_shape = (int(self.data_length / 2), 4) self._compressed_data_path = "ActivityData.rar" self._uncompressed_data_path = "DataSet/" self._all_data = None self._all_data_path = "all_data" + type(self).__name__ + ".npy" self._all_labels_path = "all_labels" + type(self).__name__ + ".npy" if os.path.isfile(self._all_data_path): self._all_data = (np.load(self._all_data_path), np.load(self._all_labels_path)) if self._all_data[0][0].shape != expected_shape: self._all_data = None if not os.path.isfile(self._compressed_data_path): urllib.request.urlretrieve( "http://ps.ewi.utwente.nl/Blog/Sensors_Activity_Recognition_DataSet_Shoaib.rar", self._compressed_data_path) if not os.path.isfile(self._uncompressed_data_path): # REMEMBER THAT THIS REQUIRES UNRAR INSTALLED with RarFile("ActivityData.rar") as rf: rf.extractall() super().__init__(expected_shape, shuffled)
def extract_by_fileslist(apath, files_list): """ Extract files from archive. Supports only rar, zip archives. """ # identify format frmt = identify_file_format(apath) _files_list, arch_obj = [], None if not frmt: raise ExternalSourceError("Not supported format") else: if frmt == 'zip': arch_obj = ZipFile(apath) elif frmt == 'rar': arch_obj = RarFile(apath) paths = [] for f in files_list: folder = os.path.abspath(os.path.dirname(f)) fname = os.path.basename(f) for _f in arch_obj.namelist(): if os.path.basename(_f) == fname: arch_obj.extract(_f, folder) # build path for just new extracted file src = os.path.join(folder, _f).replace('/', os.sep) move(src, f) paths.append(f) return paths
def download(self, url, filename): """Download a subtitle. The on_subtitle_download() method of the registered listener will be called for each downloaded subtitle. :param url: URL to subtitle archive :type url: unicode :param filename: Path to subtitle file within the archive :type filename: unicode """ # referer = urllib2.unquote(url) # self.logger.debug(u'Got referer URI: {0}'.format(referer)) # self.fetch(referer) # cookie = self.getcookie() # self.logger.debug(u'Got session cookie: {0}'.format(cookie)) # req = Request(referer + self._download_param) # req.add_header('Cookie', 'PHPSESSID=' + cookie) path = os.path.join(self.workdir, os.path.basename(filename)) # self.logger.debug(u'Downloading subtitle archive from {0}'.format(referer + self._download_param)) self.logger.debug(u'Downloading URL: {0}'.format(urllib2.unquote(url))) with closing(urlopen(urllib2.unquote(url))) as f: content = StringIO(f.read()) self.logger.debug(u'Extracting subtitle to {0}'.format(path)) with RarFile(content) as z, closing( open(path.encode('utf-8'), mode='wb')) as f: f.write(z.read(filename).decode('windows-1251').encode('utf-8')) self.listener.on_subtitle_downloaded(path)
def extract_rar(j): """ Extract a rar archive and update the job so that the extracted file is organized instead of the archive. :param dict j: organization job :return: extraction success :rtype: bool """ rf = RarFile(j['s']) media = None for f in rf.infolist(): _, ext = os.path.splitext(f.filename) if ext in MEDIA_EXTS: media = f break if not media: return False d, _ = os.path.split(j['s']) extract_path = os.path.join(d, media.filename) LOG.info(f"extract {j['s']} -> {extract_path}") rf.extract(rf.infolist()[0], path=extract_path) j['s'] = extract_path target_dir, _ = os.path.split(j['t']) j['t'] = get_target_path(extract_path, target_dir) return True
def get_images(): """ Extract Image from .rar """ files = { "chara": "adventurer", "weapon": "weapon", "dragon": "dragon", "amulet": "wyrmprint", "skill": "skills", "ability": "abilities", } path = max(QQ_RECV.glob("*.rar"), key=ST_CTIME) p = re.compile(r"icon/({})/l/.*/(\w+)_rgba8.png".format("|".join( files.keys()))) try: shutil.rmtree("./images") Path("./images").mkdir() with RarFile(path) as rf: for f in rf.infolist(): if m := p.search(f.filename): save_path = IMAGE_PATH / files[m[1]] / "{}.png".format( m[2]) extract_file(rf, f, save_path) except FileNotFoundError: print("File Not Found.") else: shutil.rmtree("./resources")
def extract(archive, extract=None, delet_archive=False): """ Extracts the given archive file. currently supports .zip, .rar or .tar archives :param archive: Path to archive file. :param extract: Path to the extracted files. if not given the archive path (without the extention) will be used :param delet_archive: If true, the archive file will be deleted after extraction :return: None """ file_path_without_extention, extension = split_path(archive) if extract: assert isdir(extract) else: os.makedirs(file_path_without_extention) extract = file_path_without_extention if extension == '.zip': with zipfile.ZipFile(archive, "r") as zip_ref: zip_ref.extractall(extract) elif extension == '.rar': with RarFile(archive, 'r') as rf: rf.extract(extract) elif extension == '.tar': with tarfile.open(archive) as tf: tf.extractall(extract) elif archive.endswith('tar.gz'): with tarfile.open(archive, 'r:gz') as tf: tf.extractall(archive[0:-1 * len('.tar.gz')]) else: raise Exception(f'Can not extract {extension} file type') if delet_archive: os.remove(archive)
def _uncompress(self, content, function, *args, **kwargs): bc = io.BytesIO(content) cf = RarFile(bc) if is_rarfile(bc) else ( ZipFile(bc) if is_zipfile(bc) else None) return function(cf, *args, **kwargs) if cf else None
def download_archive_and_add_subtitle_files(self, link, language, video, fps, subs_id): logger.info('Downloading subtitle %r', link) cache_key = sha1(link.encode("utf-8")).digest() request = region.get(cache_key) if request is NO_VALUE: time.sleep(1) request = self.retry(self.session.post(link, data={ 'id': subs_id, 'lng': language.basename.upper() }, headers={ 'referer': link }, allow_redirects=False)) if not request: return [] request.raise_for_status() region.set(cache_key, request) else: logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8')) try: archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): return self.process_archive_subtitle_files(RarFile(archive_stream), language, video, link, fps, subs_id) elif is_zipfile(archive_stream): return self.process_archive_subtitle_files(ZipFile(archive_stream), language, video, link, fps, subs_id) except: pass logger.error('Ignore unsupported archive %r', request.headers) region.delete(cache_key) return []
def download_archive_and_add_subtitle_files(self, link, language, video, fps, num_cds): logger.info('Downloading subtitle %r', link) cache_key = sha1(link.encode("utf-8")).digest() request = region.get(cache_key) if request is NO_VALUE: request = self.session.get( link, headers={'Referer': 'https://subsunacs.net/search.php'}) request.raise_for_status() region.set(cache_key, request) else: logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8')) try: archive_stream = io.BytesIO(request.content) if is_rarfile(archive_stream): return self.process_archive_subtitle_files( RarFile(archive_stream), language, video, link, fps, num_cds) elif is_zipfile(archive_stream): return self.process_archive_subtitle_files( ZipFile(archive_stream), language, video, link, fps, num_cds) elif archive_stream.seek(0) == 0 and is_7zfile(archive_stream): return self.process_archive_subtitle_files( SevenZipFile(archive_stream), language, video, link, fps, num_cds) except: pass logger.error('Ignore unsupported archive %r', request.headers) region.delete(cache_key) return []
def convert_to_cbz(self, comic_file, comic_file_name): ### This function only converts comic's archive to cbz without deleting files or folders ### I do not know what will happen if there would be two folders inside archive. Maybe one day I'll find out. cbz_comic_archive = ZipFile(self.comic_save_location + comic_file_name + ".cbz", mode="w", compression=ZIP_STORED, allowZip64=True) # cbz_comic_archive = ZipFile(comic_save_location + comic_file_name + ".cbz", mode="w", compression=ZIP_STORED, allowZip64=True, compresslevel=None, strict_timestamps=True) # Commented out, because on python3.6 compresslevel and strict_timestamps are not supported with TemporaryDirectory() as dir: ## Opens temporary directory named dir, that will be deleted when everything inside with statement is finished comic = RarFile(comic_file, mode="r") # Opening rar comic archive in read mode comic.extractall( path=dir) # Extracting every file to temporary directory base = "" # To save folder if one exists. for folder in walk(dir): ## Looping thought all folders/files if folder[1] != []: base = folder[1][0] else: for page in folder[2]: # Adding every file in temporary dir to the archive cbz_comic_archive.write(join(folder[0], page), arcname=join(base + sep + page))
def extract_rar(self, rar_path, extract_path, password): """Extracts a nested RAR file. @param rar_path: RAR path @param extract_path: where to extract @param password: RAR password """ # Test if rar file contains a file named as itself. if self.is_overwritten(rar_path): log.debug( "RAR file contains a file with the same name, original is going to be overwrite" ) # TODO: add random string. new_rar_path = rar_path + ".old" shutil.move(rar_path, new_rar_path) rar_path = new_rar_path # Extraction. with RarFile(rar_path, "r") as archive: try: archive.extractall(path=extract_path, pwd=password) except BadRarFile: raise CuckooPackageError("Invalid Rar file") except RuntimeError: try: archive.extractall(path=extract_path, pwd="infected") except RuntimeError as e: raise CuckooPackageError("Unable to extract Rar file: " "{0}".format(e)) finally: # Extract nested archives. for name in archive.namelist(): if name.endswith(".rar"): # Recurse. self.extract_rar(os.path.join(extract_path, name), extract_path, password)
def get_cb_file_for_comic(comic: models.FileItem) -> Union[ZipFile, RarFile]: if _is_file_name_cbz(comic.name): return ZipFile(comic.path) elif _is_file_name_cbr(comic.name): return RarFile(comic.path) else: raise TypeError(f"Unable to get a cb file for {comic}")
def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, headers={'Referer': self.api_url}, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') if subtitle.is_episode: subtitle.content = self._get_subtitle_from_archive( subtitle, archive) else: subtitle.content = self.get_subtitle_from_archive( subtitle, archive)
def save_file(self, datafile): """ Ouvrir le fichier uploadé et créer les images contenues :param datafile: nom du fichier d'archive ou handle de fichier """ if isinstance(datafile, str): datafile = open(datafile, 'r') content_type = datafile.content_type if content_type in { 'application/zip', 'application/x-zip-compressed', 'application/x-rar-compressed' }: if content_type in {'application/x-rar-compressed'}: archive = RarFile(datafile, 'r') else: archive = ZipFile(datafile, 'r') names = archive.namelist() for name in names: filename, fileext = os.path.splitext(name.lower()) if fileext in ('.png', '.jpg', '.jpeg'): item = archive.open(name) with NamedTemporaryFile(prefix=slugify(filename), suffix=fileext, delete=False) as tfile: tfile.write(item.read()) picture = Picture(author=self.request.user) picture.image.save(tfile.name, File(tfile)) picture.save() item.close() archive.close() datafile.close() return self.cleaned_data else: raise forms.ValidationError(_("File must be a zip or rar file"))
def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive( subtitle, archive)
def __init__(self, archive_path: str): self.archive_path = archive_path self.rarfile = RarFile(self.archive_path) self.namelist = { i.filename for i in self.rarfile.infolist() if not i.isdir() }
def crackrar(filepath, passwd): i = 0 with RarFile(filepath, 'r') as rarObj: for password in passwd: i += 1 try: rarObj.extractall(path='./extractedfile', members=None, pwd=password.rstrip('\n')) print('\n===================') print('rar file extracted!') print('===================\n') requests.get( f'http://localhost:5000/recieve?status=extracted&password={password}&processid={processid}' ) return '' except: pass try: if i % 20 == 0: requests.get( f'http://localhost:5000/progress?processid={processid}&numpasswords={numpasswords}&testedpasswords={i}' ) except Exception as e: requests.get(f'http://localhost:5000/errors?error={e}') requests.get('http://localhost:5000/recieve?status=failed') requests.get( f'http://localhost:5000/progress?processid={processid}&numpasswords={numpasswords}&testedpasswords={i}' )
def download_demo(demo, event_folder): print('Downloading %s' % demo['url']) with requests.get(demo['url'], allow_redirects=not DEBUG, headers={'User-Agent': 'joder'}, stream=True) as r: r.raise_for_status() if DEBUG: print(r.headers['Location']) else: local_filename = os.path.join(event_folder, 'rars', r.url.split('/')[-1]) with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) print('Downloaded %s' % local_filename) rar = RarFile(local_filename) rar.extractall(event_folder) for filename in rar.namelist(): old = os.path.join(event_folder, filename) new = os.path.join(event_folder, str(demo['date']) + '#' + filename) os.rename(old, new)
def __append_tables_info(self, archive_file, inner_file_name, inner_file_ext): try: inner_file = archive_file.read(inner_file_name) binary = io.BytesIO(inner_file).read() except (BadZipfile, BadRarFile, NotRarFile, io.UnsupportedOperation): raise TypeError(f'{inner_file_ext}-архив не может быть прочитан') if inner_file_ext == 'zip': file = archive_file.read(inner_file_name) try: file_read = io.BytesIO(file) zip_file = ZipFile(file_read, 'r') for file_name in zip_file.namelist(): file_ext = self.__get_file_ext(file_name) self.__append_tables_info(zip_file, file_name, file_ext) except BadZipfile: raise TypeError('zip-архив не может быть прочитан') elif inner_file_ext == 'rar': file = archive_file.read(inner_file_name) try: file_read = io.BytesIO(file) rar_file = RarFile(file_read) for file_name in rar_file.namelist(): file_ext = self.__get_file_ext(file_name) self.__append_tables_info(rar_file, file_name, file_ext) except (BadRarFile, NotRarFile, io.UnsupportedOperation): raise TypeError('rar-архив не может быть прочитан') parser = self.__choose_parser(inner_file_ext)(binary=binary, html=None) tables_info = parser.get_tables_info() for ti in tables_info: self.tables_info.append(ti)
def demux_rar(filename, options): retlist = [] if not HAS_RARFILE: return retlist try: extracted = [] password = "******" tmp_pass = options2passwd(options) if tmp_pass: password = tmp_pass with RarFile(filename, "r") as archive: infolist = archive.infolist() for info in infolist: # avoid obvious bombs if info.file_size > 100 * 1024 * 1024 or not info.file_size: continue # ignore directories if info.filename.endswith("\\"): continue # add some more sanity checking since RarFile invokes an external handler if "..\\" in info.filename: continue base, ext = os.path.splitext(info.filename) basename = os.path.basename(info.filename) ext = ext.lower() if ext == "" and len(basename) and basename[0] == ".": continue for theext in demux_extensions_list: if ext == theext: extracted.append(info.filename) break if extracted: options = Config() tmp_path = options.cuckoo.get("tmppath", "/tmp") target_path = os.path.join(tmp_path, "cuckoo-rar-tmp") if not os.path.exists(target_path): os.mkdir(target_path) tmp_dir = tempfile.mkdtemp(prefix='cuckoorar_', dir=target_path) for extfile in extracted: # RarFile differs from ZipFile in that extract() doesn't return the path of the extracted file # so we have to make it up ourselves try: archive.extract(extfile, path=tmp_dir, pwd=password) retlist.append( os.path.join(tmp_dir, extfile.replace("\\", "/"))) except: archive.extract(extfile, path=tmp_dir) retlist.append( os.path.join(tmp_dir, extfile.replace("\\", "/"))) except: pass return retlist
def download_subtitle(self, subtitle): logger.info('Downloading archive %s', subtitle) r = self.session.get(subtitle.subtitle_url, headers={'Referer': MAIN_SUBDIVX_URL+subtitle.subtitle_id}, timeout=10, verify=True) r.raise_for_status() # open the archive content = None archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Identified rar archive') content = RarFile(archive_stream) # logger.info('RarFile archive %r', content) elif is_zipfile(archive_stream): logger.debug('Identified zip archive') content = ZipFile(archive_stream) else: raise ValueError('Not a valid archive') # TODO content_list = content.namelist() # NON_LATINO_REFERENCES_IN_FILENAME = ['Espa§a'.decode('utf-8'),'espa§a'.decode('utf-8')] NON_LATINO_REFERENCES_IN_FILENAME = ['Espa§a', 'espa§a'] # logger.info('archive content_list %r', content_list) if len(content_list) == 1: sub = fix_line_ending(content.read(content_list[0])) else: for name in content_list: # logger.debug('name archive') logger.debug('name archive %s', name) # discard thae FORZADOS file if name.endswith('FORZADO.srt'): logger.debug('name.endswith(FORZADO.srt): %s', name) continue # discard hidden files if os.path.split(name)[-1].startswith('.'): logger.debug( 'os.path.split(name)[-1].startswith(.): %s', name) continue # LatinoamÇrica Espa§a # discard non-subtitle files if not name.lower().endswith(MY_SUBTITLE_EXTENSIONS): logger.debug( 'not name.lower().endswith(SUBTITLE_EXTENSIONS): %s', name) continue # discard Espa§a subtitle files if any(word in name for word in NON_LATINO_REFERENCES_IN_FILENAME): logger.debug('discard España subtitle files') continue else: logger.debug('sub selected: %s', name) sub = fix_line_ending(content.read(name)) # logger.info('sub %r', sub) subtitle.content = sub
def unrar(self, file_name): zf = RarFile(join(self.open_path, file_name), 'r') to_extract = zf.namelist()[0] zf.extract( to_extract, path=self.open_path, ) return to_extract
def extract(dir): for name in get_files(dir): mime = name.split('.')[-1] if mime == 'rar': with RarFile(name, 'r') as f: f.extractall() if mime == 'zip': unzip(name)
def get_archive_object(file): if Utils.ext(file) == 'rar': archive = RarFile(file) # Utils.ext(file) == 'zip': else: archive = zipfile.ZipFile(file) # print(archive) return archive
def archiver(archive_extension, archive_path): if archive_extension == 'zip': return ZipFile(archive_path) elif archive_extension == 'rar': return RarFile(archive_path) else: logger.error('Расширение добавлено в список доступных, ' 'но не добавлено в обработчик архивов такого расширения, ' 'или какая-либо непредвиденная ошибка')