Exemplo n.º 1
0
 def _masked(value):
     parts = str((
         value.shape,
         xxh64_hexdigest(value.data),
         xxh64_hexdigest(value.mask),
     ))
     return xxh64_hexdigest(parts)
Exemplo n.º 2
0
def _hexdigest(value):
    """
    Return a hexidecimal string hash representation of the provided value.

    Calculates a 64-bit non-cryptographic hash of the provided value,
    and returns the hexdigest string representation of the calculated hash.

    """
    # Special case: deal with numpy arrays.
    if ma.isMaskedArray(value):
        parts = (
            value.shape,
            xxh64_hexdigest(value.data),
            xxh64_hexdigest(value.mask),
        )
        value = str(parts)
    elif isinstance(value, np.ndarray):
        parts = (value.shape, xxh64_hexdigest(value))
        value = str(parts)

    try:
        # Calculate single-shot hash to avoid allocating state on the heap
        result = xxh64_hexdigest(value)
    except TypeError:
        # xxhash expects a bytes-like object, so try hashing the
        # string representation of the provided value instead, but
        # also fold in the object type...
        parts = (type(value), value)
        result = xxh64_hexdigest(str(parts))

    return result
Exemplo n.º 3
0
def calc_dir_signature(dir_path):
    dir_signatures = []
    for root_dir, dirs, files in os.walk(
            os.path.abspath(os.path.expanduser(dir_path))):
        for filename in files:
            filepath = os.path.join(root_dir, filename)
            with open(filepath, 'rb') as f:
                dir_signatures.append(xxhash.xxh64_hexdigest(f.read()))
    dir_signatures.sort()
    sigs_as_string = '\n'.join(dir_signatures) + '\n'
    return xxhash.xxh64_hexdigest(sigs_as_string)
Exemplo n.º 4
0
    def write_data(self,
                   array: np.ndarray,
                   *,
                   remote_operation: bool = False) -> bytes:
        """writes array data to disk in the numpy_00 fmtBackend

        Parameters
        ----------
        array : np.ndarray
            tensor to write to disk
        remote_operation : bool, optional, kwarg only
            True if writing in a remote operation, otherwise False. Default is
            False

        Returns
        -------
        bytes
            db hash record value specifying location information
        """
        checksum = xxh64_hexdigest(array)
        if self.w_uid in self.wFp:
            self.hIdx += 1
            if self.hIdx >= COLLECTION_SIZE:
                self.wFp[self.w_uid].flush()
                self._create_schema(remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)

        destSlc = (self.hIdx, *[slice(0, x) for x in array.shape])
        self.wFp[self.w_uid][destSlc] = array
        self.wFp[self.w_uid].flush()
        return numpy_10_encode(self.w_uid, checksum, self.hIdx, array.shape)
Exemplo n.º 5
0
def define_anonymous_type(node: Cursor, bv: bn.BinaryView) -> bn.Type:
    # An anonymous type must be either a Struct\UNION\ENUM.
    # In order to simplify working with binaryNinja, an anonymized type is de-anonymized:
    # The name of the anonymous type is a hash of its location in the source file prepended by 'anon_'
    bn.log.log_debug(f'define_anonymous_type: Processing {node.type.spelling}')

    struct = bn.Structure()
    struct.width = node.type.get_size()
    struct.alignment = node.type.get_align()
    struct_name = 'anon_' + xxhash.xxh64_hexdigest(node.type.spelling)

    for field in node.type.get_fields():
        bn_field_type = bv.get_type_by_name(field.spelling)
        field_name = field.spelling
        if not bn_field_type:
            # Need to define the field type
            # if field.is_anonymous():
            #    field_name, bn_field_type = define_anonymous_type(field, bv)
            # else:
            field_name, bn_field_type = define_type(field.get_definition(), bv)
        bn.log.log_debug(f'define_anonymous_type: Appending field - {bn_field_type} {field_name}')
        struct.append(bn_field_type, field_name)

    # Check if the underlying struct is a union
    if node.type.kind == TypeKind.ELABORATED:
        if node.type.get_named_type().get_declaration().kind == CursorKind.UNION_DECL:
            # set type to union
            struct.type = bn.StructureType.UnionStructureType

    return struct_name, bn.Type.structure_type(struct)
Exemplo n.º 6
0
    def handle_file(self, path, base_dir, ignore_patterns):
        for ignore_pattern in ignore_patterns:
            if type(ignore_pattern) == tuple:
                relative_folder, ignore_pattern = ignore_pattern
                relative_path = os.path.relpath(path, relative_folder)
            else:
                relative_path = path
            if fnmatch.fnmatch(relative_path, ignore_pattern):
                return

        storage_registry = self.storage_registry
        with open(path, "rb") as f:
            buffer = f.read()
        digest = xxhash.xxh64_hexdigest(buffer)  # type: str
        friendly_path = os.path.relpath(path, base_dir)
        if friendly_path not in storage_registry["files"]:
            storage_registry["files"][friendly_path] = {}
        original_dir = os.path.dirname(path)
        file_name = os.path.basename(path)
        rel_original_dir = os.path.relpath(original_dir, base_dir)
        mkdir_p(os.path.join(self.storage_dir, "files", rel_original_dir))
        target_rel_path = os.path.join("files", rel_original_dir, file_name + "." + digest)
        is_new = False
        if digest not in storage_registry["files"][friendly_path]:
            # save file to storage
            is_new = True
            target_full_path = os.path.join(self.storage_dir, target_rel_path)
            shutil.copy(path, target_full_path)
            storage_registry["files"][friendly_path][digest] = target_full_path
        if is_new:
            print("New file found: {}...{}".format(friendly_path, digest))
        else:
            print("Existing file found: {}...{}".format(friendly_path, digest))
        return is_new, friendly_path, digest, target_rel_path
Exemplo n.º 7
0
    def read_data(self, hashVal: NUMPY_10_DataHashSpec) -> np.ndarray:
        """Read data from disk written in the numpy_00 fmtBackend

        Parameters
        ----------
        hashVal : NUMPY_10_DataHashSpec
            record specification stored in the db

        Returns
        -------
        np.ndarray
            tensor data stored at the provided hashVal specification.

        Raises
        ------
        RuntimeError
            If the recorded checksum does not match the received checksum.

        Notes
        -----

        TO AVOID DATA LOSS / CORRUPTION:

        * On a read operation, we copy memmap subarray tensor data to a new
          `np.ndarray` instance so as to prevent writes on a raw memmap result
          slice (a `np.memmap` instance) from propogating to data on disk.

        * This is an issue for reads from a write-enabled checkout where data
          was just written, since the np flag "WRITEABLE" and "OWNDATA" will be
          true, and writes to the returned array would be overwrite that data
          slice on disk.

        * For read-only checkouts, modifications to the resultant array would
          perform a "copy on write"-like operation which would be propogated to
          all future reads of the subarray from that process, but which would
          not be persisted to disk.
        """
        srcSlc = (hashVal.collection_idx,
                  *[slice(0, x) for x in hashVal.shape])
        try:
            res = self.Fp[hashVal.uid][srcSlc]
        except TypeError:
            self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
            res = self.Fp[hashVal.uid][srcSlc]
        except KeyError:
            process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
            if Path(process_dir, f'{hashVal.uid}.npy').is_file():
                file_pth = self.DATADIR.joinpath(f'{hashVal.uid}.npy')
                self.rFp[hashVal.uid] = open_memmap(file_pth, 'r')
                res = self.Fp[hashVal.uid][srcSlc]
            else:
                raise

        out = np.array(res, dtype=res.dtype, order='C')
        if xxh64_hexdigest(out) != hashVal.checksum:
            raise RuntimeError(
                f'DATA CORRUPTION Checksum {xxh64_hexdigest(out)} != recorded {hashVal}'
            )
        return out
Exemplo n.º 8
0
    def hash(df):
        """Compute the hash of a pandas DataFrame.

        This only considers the index, data, and column names.

        """
        dataset_hash = xxhash.xxh64_hexdigest(np.ascontiguousarray(df.values))
        dataset_hash += joblib.hashing.hash(df.index)
        dataset_hash += joblib.hashing.hash(df.columns)
        return dataset_hash
Exemplo n.º 9
0
def hexdigest(item):
    """
    Calculate a hexidecimal string hash representation of the provided item.

    Calculates a 64-bit non-cryptographic hash of the provided item, using
    the extremely fast ``xxhash`` hashing algorithm, and returns the hexdigest
    string representation of the hash.

    This provides a means to compare large and/or complex objects through
    simple string hexdigest comparison.

    Args:

    * item (object):
        The item that requires to have its hexdigest calculated.

    Returns:
        The string hexidecimal representation of the item's 64-bit hash.

    """
    # Special case: deal with numpy arrays.
    if ma.isMaskedArray(item):
        parts = (
            item.shape,
            xxh64_hexdigest(item.data),
            xxh64_hexdigest(item.mask),
        )
        item = str(parts)
    elif isinstance(item, np.ndarray):
        parts = (item.shape, xxh64_hexdigest(item))
        item = str(parts)

    try:
        # Calculate single-shot hash to avoid allocating state on the heap
        result = xxh64_hexdigest(item)
    except TypeError:
        # xxhash expects a bytes-like object, so try hashing the
        # string representation of the provided item instead, but
        # also fold in the object type...
        parts = (type(item), item)
        result = xxh64_hexdigest(str(parts))

    return result
Exemplo n.º 10
0
    def hash(cube):
        cube_hash = ""
        if isinstance(cube.data, np.ma.core.MaskedArray):
            cube_hash += _ma_hasher.hash(cube.data)
        else:
            cube_hash += xxhash.xxh64_hexdigest(cube.data)
        for coord in cube.coords():
            cube_hash += joblib.hashing.hash(coord)

        cube_hash += joblib.hashing.hash(cube.metadata)
        return cube_hash
Exemplo n.º 11
0
def compute_xxhash64_digest_filepath(filepath):
    try:
        import xxhash
        import os

        assert os.path.exists(filepath)

        with open(filepath, 'rb') as file_:
            digest = xxhash.xxh64_hexdigest(file_.read())
    except Exception:
        digest = None
    return digest
Exemplo n.º 12
0
def hash_key(key: Tuple[Optional[str], Any]) -> str:
    """
    Creates a key from a picklable document and an optional humann readable
    prefix.
    """
    prefix, document = key
    raw = [
        prefix,
        xxh64_hexdigest(pickle.dumps(document))
        if document is not None else None,
    ]
    return ":".join(filter(None, raw))
Exemplo n.º 13
0
    def o_encrypt(self, data):
        a = base64.b64decode(data)
        i = 16
        s = max((len(a) - 2 * i) // 3, 0)
        u = a[s:s + i]
        a = a[0:s] + a[s + i:]
        sec_key = xxhash.xxh64_hexdigest(u, 41405)
        print(sec_key)

        text = rc4(a, sec_key)

        data = plistlib.loads(text, fmt=FMT_BINARY)
        return data
Exemplo n.º 14
0
    def write_data(self,
                   array: np.ndarray,
                   *,
                   remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        array : np.ndarray
            tensor to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        checksum = xxh64_hexdigest(array)
        if self.w_uid in self.wFp:
            self.hIdx += 1
            if self.hIdx >= self.hMaxSize:
                self.hIdx = 0
                self.hNextPath += 1
                self.hColsRemain -= 1
                if self.hColsRemain <= 1:
                    self.wFp[self.w_uid]['/'].attrs.modify(
                        'next_location', (self.hNextPath, self.hIdx))
                    self.wFp[self.w_uid]['/'].attrs.modify(
                        'collections_remaining', self.hColsRemain)
                    self.wFp[self.w_uid].flush()
                    self._create_schema(remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)

        srcSlc = None
        destSlc = (self.slcExpr[self.hIdx], self.slcExpr[0:array.size])
        flat_arr = np.ravel(array)
        self.wFp[self.w_uid][f'/{self.hNextPath}'].write_direct(
            flat_arr, srcSlc, destSlc)

        hashVal = hdf5_00_encode(uid=self.w_uid,
                                 checksum=checksum,
                                 dataset=self.hNextPath,
                                 dataset_idx=self.hIdx,
                                 shape=array.shape)
        return hashVal
Exemplo n.º 15
0
def generate_project_config(base_path):
    project_config = {}
    project_paths = {}
    project_config['paths'] = project_paths
    base_path = os.path.abspath(base_path)
    project_config['project_name'] = os.path.basename(base_path)
    project_config['project_id'] = xxh64_hexdigest(base_path + str(randint))
    project_paths['project_path'] = base_path
    project_paths['log_path'] = os.path.join(base_path, 'logs/')
    project_paths['components_path'] = os.path.join(base_path, 'components/')
    project_paths['workflows_path'] = os.path.join(base_path, 'workflows/')
    project_paths['data_path'] = os.path.join(base_path, 'data/')
    project_paths['models_path'] = os.path.join(base_path, 'models/')
    return project_config
Exemplo n.º 16
0
    def add(self, value):
        """
        Adds the item to the HyperLogLog
        """
        # h: D -> {0,1} ** 64
        # x = h(v)
        # j = <x_0x_1..x_{p-1}>
        # w = <x_{p}x_{p+1}..>
        # M[j] = max(M[j], rho(w))

        x = long(xxhash.xxh64_hexdigest(value)[:16], 16)
        # x = long(sha1(bytes(value.encode('utf8'))).hexdigest()[:16], 16)
        j = x & (self._m - 1)
        w = x >> self._p

        self._M[j] = max(self._M[j], get_rho(w, 64 - self._p))
Exemplo n.º 17
0
def _json_hash_encode(row: Series) -> Series:
    """
    Take a DataFrame row, add serialized JSON and hash

    Parameters
    ----------
    row: Series
        a DataFrame row

    Returns
    -------
    Series
        the row with the json and hash columns added
    """
    json = row.to_json()
    row["Json"] = json
    row["Hash"] = xxhash.xxh64_hexdigest(json.encode("utf-8"))
    return row
Exemplo n.º 18
0
    def write_data(self,
                   data: str,
                   *,
                   remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: str
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        encoded_data = data.encode()
        checksum = xxh64_hexdigest(encoded_data)

        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, encoded_data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_30_encode(self.w_uid, row_idx, checksum)
Exemplo n.º 19
0
 def post(self, workflow_id):
     parser = reqparse.RequestParser()
     parser.add_argument('workflow-name',
                         type=str,
                         required=True,
                         help='workflow name')
     parser.add_argument('project-id',
                         type=str,
                         required=True,
                         help='workflow path')
     args = parser.parse_args()
     args['workflow-id'] = xxh64_hexdigest(args['workflow-name'])
     args['status'] = 0
     args['environment'] = 'local'
     args['schedule'] = None
     args['execution'] = None
     db = get_plasma_db()
     workflow_collection = db.get_collection('workflows')
     workflow_collection.insert(dict(args))
     update_project_statistics(args['project-id'])
     response = generate_response(201)
     return response
Exemplo n.º 20
0
    def test_xxh64_overflow(self):
        s = 'I want an unsigned 64-bit seed!'
        a = xxhash.xxh64(s, seed=0)
        b = xxhash.xxh64(s, seed=2**64)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=0))
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**64))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=0))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=0))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**64))

        a = xxhash.xxh64(s, seed=1)
        b = xxhash.xxh64(s, seed=2**64 + 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**64 + 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64 + 1))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**64 + 1))

        a = xxhash.xxh64(s, seed=2**65 - 1)
        b = xxhash.xxh64(s, seed=2**66 - 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**65 - 1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**66 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**65 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**66 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**65 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**66 - 1))
Exemplo n.º 21
0
    def read_data(self, hashVal: LMDB_30_DataHashSpec) -> str:
        """Read data from an hdf5 file handle at the specified locations

        Parameters
        ----------
        hashVal : LMDB_30_DataHashSpec
            record specification parsed from its serialized store val in lmdb.

        Returns
        -------
        str
            requested data.
        """
        try:
            with self.Fp[hashVal.uid].begin(write=False) as txn:
                res = txn.get(hashVal.row_idx.encode(), default=False)
                if res is False:
                    raise RuntimeError(hashVal)
        except AttributeError:
            self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
            return self.read_data(hashVal)
        except KeyError:
            process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
            if Path(process_dir, f'{hashVal.uid}.lmdbdir').is_file():
                file_pth = self.DATADIR.joinpath(hashVal.uid)
                self.rFp[hashVal.uid] = lmdb.open(str(file_pth),
                                                  readonly=True,
                                                  **LMDB_SETTINGS)
                return self.read_data(hashVal)
            else:
                raise

        out = res.decode()
        if xxh64_hexdigest(res) != hashVal.checksum:
            raise RuntimeError(
                f'DATA CORRUPTION Checksum {xxh64_hexdigest(res)} != recorded {hashVal}'
            )
        return out
Exemplo n.º 22
0
 def post(self, workflow_id):
     db = get_plasma_db()
     execution_pass = {}
     execution_pass['workflow-id'] = workflow_id
     execution_pass['status'] = 1
     execution_pass['started-at'] = time()
     execution_pass['finished-at'] = None
     execution_pass['execution-id'] = xxh64_hexdigest(workflow_id +
                                                      str(time()))
     execution_collection = db.get_collection('executions')
     workflow = db.workflows.find_one({"workflow-id": workflow_id})
     project = db.projects.find_one({"project-id": workflow['project-id']})
     payload = {}
     payload['project-id'] = project['project-id']
     payload['project-name'] = project['project-name']
     payload['project-path'] = project['project-path']
     payload['environment'] = workflow['environment']
     payload['workflow-id'] = workflow_id
     payload['workflow-name'] = workflow['workflow-name']
     payload['execution-id'] = execution_pass['execution-id']
     run_workflow(payload)
     execution_collection.insert(execution_pass)
     response = generate_response(200)
     return response
Exemplo n.º 23
0
def hash_numpy(x: numpy.ndarray) -> int:
    """Return a value that uniquely identifies a numpy array."""
    return xxhash.xxh64_hexdigest(x.tobytes())
Exemplo n.º 24
0
    def perform_merge(self):
        force = self._options.get("force", False)
        slog_path = util.get_master_modpack_dir() / "logs" / "savedata.log"

        new_entries = self.consolidate_diffs(self.get_all_diffs())
        if not new_entries:
            print("No savedata merging necessary.")
            if slog_path.exists():
                slog_path.unlink()
            if (util.get_master_modpack_dir() / "logs" /
                    "savedata.sarc").exists():
                (util.get_master_modpack_dir() / "logs" /
                 "savedata.sarc").unlink()
            return
        if slog_path.exists() and not force:
            with slog_path.open("r") as l_file:
                if xxhash.xxh64_hexdigest(str(new_entries)) == l_file.read():
                    print("No savedata merging necessary.")
                    return

        savedata = get_stock_savedata()
        save_files = sorted(savedata.get_files(), key=lambda f: f.name)[0:-2]

        print("Merging changes...")
        merged_entries = oead.byml.Array(
            sorted(
                {
                    entry["HashValue"].v: entry
                    for entry in [
                        *[
                            e for file in save_files for e in
                            oead.byml.from_binary(file.data)["file_list"][1]
                        ],
                        *new_entries["add"],
                    ] if entry not in new_entries["del"]
                }.values(),
                key=itemgetter("HashValue"),
            ))
        print("Creating and injecting new savedataformat.sarc...")
        new_savedata = oead.SarcWriter(
            endian=oead.Endianness.Big if util.get_settings("wiiu") else oead.
            Endianness.Little)
        num_files = ceil(len(merged_entries) / 8192)
        for i in range(num_files):
            end_pos = (i + 1) * 8192
            if end_pos > len(merged_entries):
                end_pos = len(merged_entries)
            data = oead.byml.to_binary(
                oead.byml.Hash({
                    "file_list":
                    oead.byml.Array([
                        {
                            "IsCommon": False,
                            "IsCommonAtSameAccount": False,
                            "IsSaveSecureCode": True,
                            "file_name": "game_data.sav",
                        },
                        oead.byml.Array(merged_entries[i * 8192:end_pos]),
                    ]),
                    "save_info":
                    oead.byml.Array([{
                        "directory_num": oead.S32(8),
                        "is_build_machine": True,
                        "revision": oead.S32(18203),
                    }]),
                }),
                big_endian=util.get_settings("wiiu"),
            )
            new_savedata.files[f"/saveformat_{i}.bgsvdata"] = data

        new_savedata.files[f"/saveformat_{num_files}.bgsvdata"] = oead.Bytes(
            savedata.get_file("/saveformat_6.bgsvdata").data)
        new_savedata.files[
            f"/saveformat_{num_files + 1}.bgsvdata"] = oead.Bytes(
                savedata.get_file("/saveformat_7.bgsvdata").data)

        del savedata
        new_save_bytes = new_savedata.write()[1]
        del new_savedata
        util.inject_file_into_sarc(
            "GameData/savedataformat.ssarc",
            util.compress(new_save_bytes),
            "Pack/Bootup.pack",
            create_sarc=True,
        )
        (util.get_master_modpack_dir() / "logs").mkdir(parents=True,
                                                       exist_ok=True)
        ((util.get_master_modpack_dir() / "logs" /
          "savedata.sarc").write_bytes(new_save_bytes))

        print("Updating RSTB...")
        rstable.set_size(
            "GameData/savedataformat.sarc",
            rstable.calculate_size("GameData/savedataformat.sarc",
                                   new_save_bytes),
        )
        del new_save_bytes

        slog_path.parent.mkdir(parents=True, exist_ok=True)
        with slog_path.open("w", encoding="utf-8") as l_file:
            l_file.write(xxhash.xxh64_hexdigest(str(new_entries)))
Exemplo n.º 25
0
    def perform_merge(self):
        force = self._options.get("force", False)
        glog_path = util.get_master_modpack_dir() / "logs" / "gamedata.log"

        modded_entries = self.consolidate_diffs(self.get_all_diffs())
        util.vprint("All gamedata diffs:")
        util.vprint(modded_entries)
        if not modded_entries:
            print("No gamedata merging necessary.")
            if glog_path.exists():
                glog_path.unlink()
            if (util.get_master_modpack_dir() / "logs" /
                    "gamedata.sarc").exists():
                (util.get_master_modpack_dir() / "logs" /
                 "gamedata.sarc").unlink()
            return
        if glog_path.exists() and not force:
            with glog_path.open("r") as l_file:
                if xxhash.xxh64_hexdigest(
                        str(modded_entries)) == l_file.read():
                    print("No gamedata merging necessary.")
                    return

        print("Loading stock gamedata...")
        gamedata = consolidate_gamedata(get_stock_gamedata())
        merged_entries = {
            data_type:
            oead.byml.Hash({entry["DataName"]: entry
                            for entry in entries})
            for data_type, entries in gamedata.items()
        }
        del gamedata

        print("Merging changes...")
        for data_type in {d for d in merged_entries if d in modded_entries}:
            util.dict_merge(
                merged_entries[data_type],
                modded_entries[data_type]["add"],
                shallow=True,
            )
            for entry in modded_entries[data_type]["del"]:
                try:
                    del merged_entries[data_type][entry]
                except KeyError:
                    continue

        merged_entries = oead.byml.Hash({
            data_type: oead.byml.Array({value
                                        for _, value in entries.items()})
            for data_type, entries in merged_entries.items()
        })
        print("Creating and injecting new gamedata.sarc...")
        new_gamedata = oead.SarcWriter(
            endian=oead.Endianness.Big if util.get_settings("wiiu") else oead.
            Endianness.Little)
        for data_type in merged_entries:
            num_files = ceil(len(merged_entries[data_type]) / 4096)
            for i in range(num_files):
                end_pos = (i + 1) * 4096
                if end_pos > len(merged_entries[data_type]):
                    end_pos = len(merged_entries[data_type])
                new_gamedata.files[
                    f"/{data_type}_{i}.bgdata"] = oead.byml.to_binary(
                        oead.byml.Hash({
                            data_type:
                            merged_entries[data_type][i * 4096:end_pos]
                        }),
                        big_endian=util.get_settings("wiiu"),
                    )
        new_gamedata_bytes = new_gamedata.write()[1]
        del new_gamedata
        util.inject_file_into_sarc(
            "GameData/gamedata.ssarc",
            util.compress(new_gamedata_bytes),
            "Pack/Bootup.pack",
            create_sarc=True,
        )
        (util.get_master_modpack_dir() / "logs").mkdir(parents=True,
                                                       exist_ok=True)
        (util.get_master_modpack_dir() / "logs" /
         "gamedata.sarc").write_bytes(new_gamedata_bytes)

        print("Updating RSTB...")
        rstable.set_size(
            "GameData/gamedata.sarc",
            rstable.calculate_size("GameData/gamedata.sarc",
                                   new_gamedata_bytes),
        )
        del new_gamedata_bytes

        glog_path.parent.mkdir(parents=True, exist_ok=True)
        with glog_path.open("w", encoding="utf-8") as l_file:
            l_file.write(xxhash.xxh64_hexdigest(str(modded_entries)))
Exemplo n.º 26
0
def compute_file_hash(file):
    return xxh64_hexdigest(file.read())
Exemplo n.º 27
0
def compute_hash_hex64(value):
    return xxh64_hexdigest(value)
Exemplo n.º 28
0
def hashit(dn, bloom_hashes, bloom_bits):
    a = int(xxhash.xxh64_hexdigest(dn, seed=0), 16)
    b = int(xxhash.xxh64_hexdigest(dn, seed=a), 16)
    idx = [(a + b * k) % bloom_bits
           for k in range(bloom_hashes)]  # get int for bit setting
    return idx
Exemplo n.º 29
0
def get_volname_hash(volname):
    """XXHash based on Volume name"""
    return xxhash.xxh64_hexdigest(volname)
Exemplo n.º 30
0
 def cachefile(self, path):
     return os.path.join(self.cache_path, xxhash.xxh64_hexdigest(path))