예제 #1
0
파일: core.py 프로젝트: isi-vista/aida-viz
def zipfile_to_documents(
        corpus_zipfile: ZipFile,
        prefix: Optional[str]) -> List[Tuple[str, str, str, str]]:
    print(f"Reading .ltf documents in {corpus_zipfile.filename}")

    if prefix is None:
        prefix = get_root_dir_name(corpus_zipfile) or ""

    parent_children_path = _find_name_in_zip(
        corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab"))

    if not parent_children_path:
        raise RuntimeError("Archive lacks parent_children.tab")

    parent_children_tab = _read_tab_file(
        CharSource.from_file_in_zip(corpus_zipfile, parent_children_path))

    child_to_parent_map = _create_child_to_parent_map(parent_children_tab)
    child_to_lang_map = _create_child_to_lang_map(parent_children_tab)

    documents = []
    text_dir = ZipPath(corpus_zipfile, at=f"{prefix}data/ltf/")

    for source_doc_path in text_dir.iterdir():
        source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes()))

        for source_info in tqdm(
                source_doc_zip.infolist(),
                desc=f"Extracting {source_doc_path.name}",
                bar_format="{l_bar}{bar:20}{r_bar}",
        ):

            doceid_path = ZipPath(source_doc_zip, at=source_info.filename)
            try:
                doceid = doceid_path.name.split(".")[0]
                doc_id = child_to_parent_map[doceid]
                lang_id = child_to_lang_map[doceid]
                raw_text = convert_ltf_to_raw_text(
                    doceid_path.read_text(encoding="utf-8"))

                documents.append((doc_id, doceid, lang_id, raw_text))

            except AttributeError:
                raise FileNotFoundError(f"Could not read from {doceid_path}.")

    return documents
예제 #2
0
 def from_path(path: Path) -> Dict[str, Any]:
     with ZipFile(path) as zip_file:
         return {
             doc_props_file_path.name:
             _OfficeOpenXMLParser().extract_metadata(
                 doc_props_file_path.read_text())
             for doc_props_file_path in ZipPath(zip_file,
                                                'docProps/').iterdir()
         }
예제 #3
0
def load_game_assets():
    """Loads game assets globally"""
    zip_file = _locate_available_datastore()
    config = _load_package_config(zip_file=zip_file)

    register_package_metadata(**config["package"])

    print("#" * 64)
    print("Assets loading...")
    print("Package:", config["package"]["name"], "| V",
          config["package"]["version"])

    armor = ZipPath(zip_file) / "armor"
    weapons = ZipPath(zip_file) / "weapons"

    _recursive_object_registration(armor)
    _recursive_object_registration(weapons)

    print("Assets Loaded.")
    print("#" * 64)
예제 #4
0
def find_archive_metafile(location):
    """
    Return a Path-like object to a Python metafile found in a Python package egg
    or wheel archive at ``location`` or None.
    """
    zf = zipfile.ZipFile(location)
    for path in ZipPath(zf).iterdir():
        if path.name.endswith(meta_dir_suffixes):
            for metapath in path.iterdir():
                if metapath.name.endswith(meta_file_names):
                    return metapath
예제 #5
0
 def recognize(cls, location):
     """
     Yield one or more Package manifest objects given a file ``location`` pointing to a
     package archive, manifest or similar.
     """
     with zipfile.ZipFile(location) as zf:
         for path in ZipPath(zf).iterdir():
             if not path.name.endswith(meta_dir_suffixes):
                 continue
             for metapath in path.iterdir():
                 if metapath.name.endswith(meta_file_names):
                     yield parse_metadata(cls, metapath)
예제 #6
0
파일: corpus.py 프로젝트: TonyBY/aida-viz
def get_text_docs(corpus_zipfile: ZipFile) -> ImmutableDict[str, str]:
    print(f"Reading .ltf documents in {corpus_zipfile.filename}")

    prefix = get_root_dir_name(corpus_zipfile) or ""

    parent_children_path = _find_name_in_zip(
        corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab"))

    if not parent_children_path:
        raise RuntimeError("Archive lacks parent_children.tab")

    parent_children_tab = _read_tab_file(
        CharSource.from_file_in_zip(corpus_zipfile, parent_children_path))

    child_to_parent_map = _create_child_to_parent_map(parent_children_tab)

    text_docs = {}
    text_dir = ZipPath(corpus_zipfile, at="data/ltf/")

    for source_doc_path in text_dir.iterdir():
        source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes()))

        for source_info in tqdm(
                source_doc_zip.infolist(),
                desc=f"Extracting {source_doc_path.name}",
                bar_format="{l_bar}{bar:20}{r_bar}",
        ):

            doc = ZipPath(source_doc_zip, at=source_info.filename)
            try:
                doceid = doc.name.split(".")[0]
                doc_id = child_to_parent_map[doceid]
                text_docs[doc_id] = convert_ltf_to_raw_text(
                    doc.read_text(encoding="utf-8"))
            except AttributeError:
                raise FileNotFoundError(f"Could not read from {doc}.")

    return immutabledict(text_docs)
예제 #7
0
def parse_archive(location):
    """
    Return a PythonPackage from a package archive wheel or egg file at
    ``location``.
    """
    if not location or not location.endswith(bdist_file_suffixes):
        return

    with zipfile.ZipFile(location) as zf:
        for path in ZipPath(zf).iterdir():
            if not path.name.endswith(meta_dir_suffixes):
                continue
            for metapath in path.iterdir():
                if metapath.name.endswith(meta_file_names):
                    return parse_metadata(metapath)
예제 #8
0
def unpack(path: Path, client: Client):
	print('Unpacking archive...')
	with ZipFile(path, 'r') as zipfile:
		extract_to_folder = Path('ClientAssets', client.name)
		if not extract_to_folder.exists(): extract_to_folder.mkdir(parents=True)

		for versiontype, suffix in version_file_suffix.items():
			versionfname = 'version'+suffix+'.txt'
			if not 'assets/'+versionfname in zipfile.namelist():
				print(f'The file {versionfname} could not be found in the archive. Has the archive been modified?')
				continue

			versionpath = Path(extract_to_folder, versionfname)
			with zipfile.open('assets/'+versionfname, 'r') as zf:
				obbversion = zf.read().decode('utf8') 

			# if a version file already exists, compare the versions
			# if the obbversion is smaller, don't extract data from obb
			if versionpath.exists():
				with open(versionpath, 'r') as f:
					currentversion = f.read()
				
				if tuple(obbversion.split('.')) < tuple(currentversion.split('.')):
					print(f'Current version {currentversion} is higher or same as obb version {obbversion} for {versionfname}.')
					continue

			# write new version to file
			with open(versionpath, 'w') as f:
				f.write(obbversion)
			
			# extract corresponding hashfile
			with open(Path(extract_to_folder, 'hashes'+suffix+'.csv'), 'wb') as fh:
				with zipfile.open('assets/hashes'+suffix+'.csv', 'r') as zfh:
					shutil.copyfileobj(zfh, fh)
			
			# also extract all assets
			if versiontype == VersionType.AZL:
				# remove all existing asset bundles
				abpath = Path(extract_to_folder, 'AssetBundles')
				shutil.rmtree(abpath, ignore_errors=True)

				# extract assets now
				zipabpath = ZipPath(zipfile, 'assets/AssetBundles/')
				extract_folder(zipfile, zipabpath, zipabpath, abpath)
예제 #9
0
 def distinfo(self, wheel):
     return ZipPath(wheel, f'{self.distname}-{self.version}.dist-info/')
예제 #10
0
    def test_write_adds_file_to_archive(self, wf, tmp_file):
        tmp_file.write_text("contents")
        wf.write(tmp_file)
        arc_file = ZipPath(wf.zipfile, str(tmp_file.name).lstrip('/'))

        assert arc_file.read_text() == tmp_file.read_text()
예제 #11
0
 def arcpath(self, wf):
     path = '/some/archive/path'
     return ZipPath(wf.zipfile, path)