Python xxh64_intdigest示例，xxhash.xxh64_intdigest Python示例

示例#1

0

显示文件

文件： Deduper.py 项目： pkleinert/Deduper

def hash_to_mem(file, print_progress=False):
    block_num = 0
    dict = {}
    list = []
    start_time = time.time()
    with open(file, "rb") as f:
        # Read first block
        block = f.read(block_size)
        while block != b"":
            # Compute hash of read block
#            hex_dig = hashlib.sha256(block).hexdigest()
            hex_dig = xxhash.xxh64_intdigest(block, 20181217)

            # Read next block
            block = f.read(block_size)

            # Store the block hash to the dict (if unique) and also to the list of blocks (always)
            if not hex_dig in dict:
                dict[hex_dig] = block_num
            list.append(hex_dig)

            block_num += 1

            # Print progress if enabled
            if print_progress and block_num % 1000 == 0:
                print('.', end='', flush=True)

    if print_progress:
        print()
    print(" Hashing speed: %8.3f MB/s" % (block_size * block_num / 1024 / (time.time() - start_time + 0.1) / 1024) )
    print(" Hashing took:  %d:%02d" % divmod(time.time() - start_time, 60) )
    return {'dict': dict, 'list': list}

示例#2

0

显示文件

文件： texts.py 项目： StoneyMoonCat/BCML

def diff_msyt(msyt: Path, hashes: dict, mod_out: Path, ref_dir: Path):
    diff = {}
    filename = msyt.relative_to(mod_out).as_posix()
    if any(ex in filename for ex in EXCLUDE_TEXTS):
        msyt.unlink()
        return {}
    data = msyt.read_bytes()
    xxh = xxhash.xxh64_intdigest(data)
    if filename in hashes and hashes[filename] == xxh:
        pass
    else:
        text = data.decode("utf8")
        if filename not in hashes:
            diff[filename] = json.loads(text)["entries"]
        else:
            ref_text = (ref_dir / filename).read_text("utf-8")
            if "".join(text.split()) != "".join(ref_text.split()):
                ref_contents = json.loads(ref_text)
                contents = json.loads(text)
                diff[filename] = {
                    entry: value
                    for entry, value in contents["entries"].items()
                    if (entry not in ref_contents["entries"]
                        or value != ref_contents["entries"][entry])
                }
            else:
                pass
            del ref_text
            del text
    msyt.unlink()
    del data
    return diff

示例#3

0

显示文件

文件： repeatablerandom.py 项目： cvpines/pysamplespace

    def getnextblock(self) -> int:
        """Get a block of random bits from the random generator.

        Tip:
            Calling this method will advance the sequence exactly one
            time. The resulting index depends on whether or not the
            call occurs within a cascade.

        Returns:
            A block of :attr:`BLOCK_SIZE_BITS` random bits as an int
        """
        # Note that the current index is used as the hash seed, and
        # the hash input (generated by hashing the sequence's seed)
        # remains constant over the run of the sequence. This greatly
        # simplifies index generation across platforms, and, based on
        # testing, has no adverse effects on value distribution.
        result = xxhash.xxh64_intdigest(self._hash_input, self._index)
        if self._cascading:
            # Normally, the index is incremented after each block.
            # When cascading, generated blocks are fed forward to use
            # in subsequent blocks; hence, single indices cascade across
            # multiple calls to getnextblock().
            self._index = result
        else:
            self._index += 1
        return result

示例#4

0

显示文件

    def hash(self) -> int:
        """
        Cached property containing the xxhash of the file

        :return:
        """
        with open(self.path, "rb") as f:
            return xxhash.xxh64_intdigest(f.read())

示例#5

0

显示文件

def _pack_sarc(folder: Path, tmp_dir: Path, hashes: dict):
    packed = oead.SarcWriter(
        endian=oead.Endianness.Big
        if util.get_settings("wiiu")
        else oead.Endianness.Little
    )
    try:
        canon = util.get_canon_name(
            folder.relative_to(tmp_dir).as_posix(), allow_no_source=True
        )
        if canon not in hashes:
            raise FileNotFoundError("File not in game dump")
        stock_file = util.get_game_file(folder.relative_to(tmp_dir))
        try:
            old_sarc = oead.Sarc(util.unyaz_if_needed(stock_file.read_bytes()))
        except (RuntimeError, ValueError, oead.InvalidDataError):
            raise ValueError("Cannot open file from game dump")
        old_files = {f.name for f in old_sarc.get_files()}
    except (FileNotFoundError, ValueError):
        for file in {f for f in folder.rglob("**/*") if f.is_file()}:
            packed.files[file.relative_to(folder).as_posix()] = file.read_bytes()
    else:
        for file in {
            f
            for f in folder.rglob("**/*")
            if f.is_file() and not f.suffix in EXCLUDE_EXTS
        }:
            file_data = file.read_bytes()
            xhash = xxhash.xxh64_intdigest(util.unyaz_if_needed(file_data))
            file_name = file.relative_to(folder).as_posix()
            if file_name in old_files:
                old_hash = xxhash.xxh64_intdigest(
                    util.unyaz_if_needed(old_sarc.get_file(file_name).data)
                )
            if file_name not in old_files or (xhash != old_hash):
                packed.files[file_name] = file_data
    finally:
        shutil.rmtree(folder)
        if not packed.files:
            return  # pylint: disable=lost-exception
        sarc_bytes = packed.write()[1]
        folder.write_bytes(
            util.compress(sarc_bytes)
            if (folder.suffix.startswith(".s") and not folder.suffix == ".sarc")
            else sarc_bytes
        )

示例#6

0

显示文件

    def is_known(self, p):
        """Check a path, return True if it is known"""

        h = xxh64_intdigest(p)
        if h in self.unknown:
            self._add_known(h, p)
            return True
        return h in self.known

示例#7

0

显示文件

文件： package.py 项目： pabs3/repology-updater

 def get_classless_hash(self) -> int:
     return cast(
         int,
         xxhash.xxh64_intdigest(
             json.dumps(
                 {
                     slot: getattr(self, slot)
                     for slot in self.__slots__ if slot != 'versionclass'
                 },
                 sort_keys=True))) & 0x7fffffffffffffff

示例#8

0

显示文件

    def check_iter(self, paths):
        """Check paths from an iterable"""

        # failsafe for common dumb error
        if isinstance(paths, str):
            raise TypeError("expected iterable of strings, got a string")
        unknown = self.unknown
        for p in paths:
            h = xxh64_intdigest(p)
            if h in unknown:
                self._add_known(h, p)

示例#9

0

显示文件

文件： data.py 项目： arkhamsaiyan/BCML

def is_savedata_modded(savedata: oead.Sarc) -> {}:
    hashes = get_savedata_hashes()
    sv_files = sorted(savedata.get_files(), key=lambda file: file.name)
    fix_slash = "/" if not sv_files[0].name.startswith("/") else ""
    modded = False
    for svdata in sv_files[0:-2]:
        svdata_hash = xxhash.xxh64_intdigest(svdata.data)
        if not modded:
            modded = (fix_slash + svdata.name not in hashes
                      or svdata_hash != hashes[fix_slash + svdata.name])
    return modded

示例#10

0

显示文件

def is_file_modded(name: str,
                   file: Union[bytes, Path],
                   count_new: bool = True) -> bool:
    contents = (file if isinstance(file, bytes) else
                file.read_bytes() if isinstance(file, Path) else bytes(file))
    if contents[0:4] == b"Yaz0":
        contents = decompress(contents)
    table = get_hash_table(get_settings("wiiu"))
    if name not in table:
        return count_new
    fhash = xxhash.xxh64_intdigest(contents)
    return not fhash in table[name]

示例#11

0

显示文件

文件： wrapper.py 项目： anchorhash/py-anchorhash

    def get_resource(self, key: str) -> Tuple[str, int]:
        """Return resource for key

        Args:
            key

        Returns:
            name of resource, bucket
        """
        k = xxh64_intdigest(key, self.seed)
        b = self.anchor.get_bucket(k)
        s = self.M[b]
        return s, b

示例#12

0

显示文件

文件： util.py 项目： Endrr/BCML

def is_file_modded(name: str,
                   file: Union[bytes, Path],
                   count_new: bool = True) -> bool:
    table = get_hash_table(get_settings("wiiu"))
    if name not in table:
        return count_new
    contents = (file if isinstance(file, bytes) else
                file.read_bytes() if isinstance(file, Path) else bytes(file))
    if contents[0:4] == b"Yaz0":
        try:
            contents = decompress(contents)
        except RuntimeError as err:
            raise ValueError(f"Invalid yaz0 file {name}") from err
    fhash = xxhash.xxh64_intdigest(contents)
    return not fhash in table[name]

示例#13

0

显示文件

文件： rule.py 项目： pombredanne/repology

    def __init__(self, number: int, ruledata: dict[str, Any]) -> None:
        self.names = None
        self.namepat = None
        self.rulesets = None
        self.norulesets = None
        self.number = number

        self.pretty = str(ruledata)
        self.texthash = xxhash.xxh64_intdigest(self.pretty)

        self._matchers = []
        self._actions = []

        # handle substitution of final name in name matchers
        if 'name' in ruledata:
            self.names = yaml_as_list(ruledata['name'])

            if 'setname' in ruledata:
                self.names = [
                    DOLLAR0.sub(ruledata['setname'], name)
                    for name in self.names
                ]

            ruledata['name'] = self.names

        if 'namepat' in ruledata:
            self.namepat = ruledata['namepat'].replace('\n', '')

            if 'setname' in ruledata:
                self.namepat = DOLLAR0.sub(ruledata['setname'], self.namepat)

            ruledata['namepat'] = self.namepat

        if 'ruleset' in ruledata:
            self.rulesets = yaml_as_set(ruledata['ruleset'])

        if 'noruleset' in ruledata:
            self.norulesets = yaml_as_set(ruledata['noruleset'])

        # matchers
        for keyword, generate_matcher in get_matcher_generators():
            if keyword in ruledata:
                self._matchers.append(generate_matcher(ruledata))

        # actions
        for keyword, generate_action in get_action_generators():
            if keyword in ruledata:
                self._actions.append(generate_action(ruledata))

示例#14

0

显示文件

    def __init__(self, key, n):
        self.key = key
        self.n = n
        # Figure out sizes needed for array
        for t in 'BHILQ':
            if array.array(t).itemsize * 8 > np.log(n) / np.log(2):
                typecode = t
                break

        y = Integer(1)
        table = [array.array(typecode) for _ in range(2**16)]
        for x in range(n):
            h = xxh64_intdigest(str(y)) % 2**16
            table[h].append(x)
            y = Integer(y) * Integer(4) % key.p
            if x % 1000000 == 0:
                print(x)
        self.table = table

示例#15

0

显示文件

    def get_bucket(self, k: int) -> int:
        """Calculates bucket for key

        :param k: key, assumed to be uniform (already hashed)
        :return: assigned bucket
        """
        # uncomment next line if key not already hashed
        # k = xxh64_intdigest(bin(k), k)
        b = k % self.M
        while self.A[b] > 0:  # b is removed
            # next line is like random(seed=k,b)
            # could instead use: k = int(0xFFFFFFFFFFFFFFFF & (k * 2862933555777941757 + 1))
            k = xxh64_intdigest(bin(k) + bin(b), k)
            h = k % self.A[b]
            while self.A[h] >= self.A[b]:  # b removed prior to h
                h = self.K[h]
            b = h
        return b

示例#16

0

显示文件

文件： wadmod.py 项目： pradishb/LCS-Mod-Manager

    def create_list(modpath: str):
        modpath = path.normpath(modpath)
        entries = {}
        for filepath in iglob(f"{modpath}/*"):
            if path.isfile(filepath):
                try:
                    name = path.splitext(path.basename(filepath))[0]
                    key = int(name, 16)
                    entries[key] = ModEntry.create(filepath, key)
                except ValueError:
                    pass
        for filepath in iglob(f"{modpath}/*/**/*", recursive=True):
            if path.isfile(filepath):
                relpath = path.relpath(filepath,
                                       modpath).lower().replace('\\', '/')
                key = xxh64_intdigest(relpath)
                entries[key] = ModEntry.create(filepath, key)

        return entries

示例#17

0

显示文件

    def test_xxh64_overflow(self):
        s = 'I want an unsigned 64-bit seed!'
        a = xxhash.xxh64(s, seed=0)
        b = xxhash.xxh64(s, seed=2**64)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=0))
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**64))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=0))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=0))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**64))

        a = xxhash.xxh64(s, seed=1)
        b = xxhash.xxh64(s, seed=2**64 + 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**64 + 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64 + 1))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**64 + 1))

        a = xxhash.xxh64(s, seed=2**65 - 1)
        b = xxhash.xxh64(s, seed=2**66 - 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**65 - 1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**66 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**65 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**66 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**65 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**66 - 1))

示例#18

0

显示文件

 def data_iterator():
     fields = line.decode('gbk').encode('utf-8').strip('\r\n').split(
         ' ')
     if len(fields) != 32:
         yield None
     label_ctr = int(fields[0])
     slots = []
     feature_fields = fields[1:]
     for i in range(0, len(fea_sections)):
         slot = []
         describe = fea_sections[i]['fea_des']
         fea_type = fea_sections[i]['fea_type']
         size = int(fea_sections[i]['max_sz'])
         value_list = feature_fields[i].split(',')
         if fea_type in ['sparse']:
             for value in value_list:
                 # why do hashing here? hashing should incorporate slotid
                 slot.append(xxhash.xxh64_intdigest(value) % size)
             if len(slot) == 0:
                 slot.append(0)
             slots.append(slot)
     slots.append([label_ctr])
     yield zip(self.slot_name, slots)

示例#19

0

显示文件

    def __init__(self, key, n):
        self.key = key
        self.n = n
        # Figure out sizes needed for array
        for t in 'BHILQ':
            if array.array(t).itemsize * 8 > np.log(n) / np.log(2):
                typecode = t
                break

        y = Integer(1)
        table = [array.array(typecode) for _ in range(2**16)]
        for x in range(n):
            h = xxh64_intdigest(str(y)) % 2**16
            table[h].append(x)
            y = Integer(y) * Integer(4) % key.p
            if x % 1000000 == 0:
                print(x)

        # Figure out equivalent numpy sizes
        s = array.array(typecode).itemsize * 8
        if s <= 8:
            nptype = np.uint8
        elif s <= 16:
            nptype = np.uint16
        elif s <= 32:
            nptype = np.uint32
        elif s <= 64:
            nptype = np.uint64
        else:
            raise TypeError("No numpy type large enough to hold array")
        maxtable_length = max([len(t) for t in table])
        table_of_nps = []
        for t in table:
            t_np = np.array(t, dtype=nptype)
            t_np.resize(maxtable_length)
            table_of_nps.append(t_np)
        self.table = np.asarray(table_of_nps)

示例#20

0

显示文件

    def check(self, p):
        """Check a single hash, print and add to known on match"""

        h = xxh64_intdigest(p)
        if h in self.unknown:
            self._add_known(h, p)

示例#21

0

显示文件

文件： binfile.py 项目： Morilli/CDTB

 def compute_hash(cls, s):
     return xxh64_intdigest(s.lower())

示例#22

0

显示文件

文件： hash64_shingles.py 项目： TheTorProject/ooni-pipeline

def xxhash_u64_v1(): # 100 loops, best of 100: 700 usec per loop
    for m1, m2 in prepare():
        i = xxhash.xxh64_intdigest(TEXT[m1.end():m2.start()])

示例#23

0

显示文件

 def __hashing_shingles(self, shingles):
     return [xxhash.xxh64_intdigest(shingle) for shingle in shingles]

示例#24

0

显示文件

 def dlog4(self, q):
     for x in self.table[xxh64_intdigest(str(q)) % 2**16]:
         if pow(Integer(4), int(x), self.key.p) == q:
             return x

示例#25

0

显示文件

文件： data.py 项目： arkhamsaiyan/BCML

def get_savedata_hashes() -> {}:
    savedata = get_stock_savedata()
    return {
        file.name: xxhash.xxh64_intdigest(file.data)
        for file in savedata.get_files()
    }

示例#26

0

显示文件

def get_gamedata_hashes() -> Dict[str, int]:
    gamedata = get_stock_gamedata()
    return {
        file.name: xxhash.xxh64_intdigest(file.data) for file in gamedata.get_files()
    }

示例#27

0

显示文件

文件： simhash.py 项目： iamlemec/patents

def murmur(x):
    return np.uint64(xxhash.xxh64_intdigest(x))

示例#28

0

显示文件

文件： utils.py 项目： lhs950204/framework

def compute_hash_int64(value):
    n = xxh64_intdigest(value)
    return (n ^ 0x8000000000000000) - 0x8000000000000000

示例#29

0

显示文件

文件： simhash.py 项目： younbaek/patents

def murmur(x):
    return np.uint64(xxhash.xxh64_intdigest(x))

示例#30

0

显示文件

文件： rstParser.py 项目： dryancd/lolstaticdata

def key_to_hash(key):
    if isinstance(key, str):
        return xxh64_intdigest(key.lower()) & 0xffffffffff
    else:
        return key

示例#31

0

显示文件

def hash_output(dat_array):
    x = xxhash.xxh64_intdigest(dat_array.data.tobytes(), seed=0)
    # print(x)
    # hashOutput = hash(dat_array.data.tobytes())
    return x

示例#32

0

显示文件

static_table = [line.split() for line in static_table_raw.splitlines()]
static_table = filter(lambda entry: len(entry) >= 2, static_table)
static_table = sorted(static_table, key=lambda x: x[1])

cases = ""

for header, entry in itertools.groupby(static_table, lambda x: x[1]):
    entry = list(entry)

    indices = [nested[0] for nested in entry]
    values = [str.join(" ", nested[2:]) for nested in entry if len(nested) > 2]

    if len(values) == 0:
        index = entry[0][0]
        cases += no_values_template.format(xxhash.xxh64_intdigest(header),
                                           header, index)
    else:
        value_cases = ""

        for index, value in zip(indices, values):
            value_cases += value_template.format(xxhash.xxh64_intdigest(value),
                                                 value, index)

        cases += values_template.format(xxhash.xxh64_intdigest(header), header,
                                        value_cases, indices[0])

find_index = find_index_template.format(cases)

encode_generated_template = """\
#include <bnl/http3/header.hpp>