def __init__(self, input_dim, output_dim, virtual_weight_dim, placeholders, layer_num, dropout=0., sparse_inputs=False, act=tf.nn.relu, bias=False, featureless=False, **kwargs): super(GraphConvolutionCompressed, self).__init__(**kwargs) if dropout: self.dropout = placeholders['dropout'] else: self.dropout = 0. self.act = act self.support = placeholders['support'] self.sparse_inputs = sparse_inputs self.featureless = featureless self.bias = bias self.input_dim = input_dim self.output_dim = output_dim self.virtual_weight_dim = virtual_weight_dim self.seed = layer_num # helper variable for sparse dropout self.num_features_nonzero = placeholders['num_features_nonzero'] with tf.variable_scope(self.name + '_vars'): for i in range(len(self.support)): # Use uniform weight initialization self.vars['weights_' + str(i)] = uniform([virtual_weight_dim], name='weights_' + str(i)) if self.bias: self.vars['bias'] = zeros([output_dim], name='bias') # build up hash mappings mappings = [] signs = [] for k in range(self.input_dim): mappings.append([]) signs.append([]) for j in range(self.output_dim): # Compute hash index virtual_weight_idx = \ xxhash.xxh32_intdigest(str((k,j)), self.seed) % self.virtual_weight_dim hashed_sign = 1. - 2 * (xxhash.xxh32_intdigest(str((k, j)), self.seed + 1000) % 2) mappings[-1].append(virtual_weight_idx) signs[-1].append(hashed_sign) self.mappings = tf.stack(mappings) self.signs = tf.stack(signs) if self.logging: self._log_vars()
def _gen_permutation(self) -> List[List[int]]: """generate permutations""" m = self.m permutation: List[List[int]] = [] for i, name in enumerate(list(self.nodes)): offset = xxh32_intdigest(name, seed=const.MAGLEV_OFFSET_SEED) % m skip = xxh32_intdigest(name, seed=const.MAGLEV_SKIP_SEED) % (m - 1) + 1 permutation.append([]) for j in range(0, m): permutation[i].append((offset + j * skip) % m) return permutation
def check_hot(self, filename): for i in range(self.hashFuncNum): hash_code = xxhash.xxh32_intdigest(filename, self.seeds[i]) // 2**7 hot_val = self.hash_table[hash_code] if hot_val < self.threshold: return False return True
def __get_anchors(self, number: int): number_str = str(number) anchors = [] for seed in range(self.__num_hash): anchor = xxh32_intdigest(number_str, seed=seed) anchors.append(anchor % self.__length) return anchors
def test_xxh32_overflow(self): s = 'I want an unsigned 32-bit seed!' a = xxhash.xxh32(s, seed=0) b = xxhash.xxh32(s, seed=2**32) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=0)) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**32)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=0)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**32)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=0)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**32)) a = xxhash.xxh32(s, seed=1) b = xxhash.xxh32(s, seed=2**32+1) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=1)) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**32+1)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=1)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**32+1)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=1)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**32+1)) a = xxhash.xxh32(s, seed=2**33-1) b = xxhash.xxh32(s, seed=2**34-1) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**33-1)) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**34-1)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**33-1)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**34-1)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**33-1)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**34-1)) a = xxhash.xxh32(s, seed=2**65-1) b = xxhash.xxh32(s, seed=2**66-1) self.assertEqual(a.seed, b.seed) self.assertEqual(a.intdigest(), b.intdigest()) self.assertEqual(a.hexdigest(), b.hexdigest()) self.assertEqual(a.digest(), b.digest()) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**65-1)) self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**66-1)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**65-1)) self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**66-1)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**65-1)) self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**66-1))
def __init__(self, name, **kwargs): super().__init__(name, **kwargs) self.description = kwargs.get("description", "") self.idx = kwargs.get("idx", []) self.perturbable = kwargs.get("perturbable", True) # TODO: (Verify) Could initialize the RNG right away, since cloudpickle should still be able to pickle it self._rng_seed = xxhash.xxh32_intdigest(name) self.rng = None # RNG used for permuting this feature - see perturbations.py: 'feature.rng' for key, value in ATTRIBUTES.items(): setattr(self, key, value)
def add_access(self, filename): self.counter += 1 hot_flag = True for i in range(self.hashFuncNum): hash_code = xxhash.xxh32_intdigest(filename, self.seeds[i]) // 2**7 hot_val = self.hash_table[hash_code] if hot_val + 1 < self.threshold: hot_flag = False self.hash_table[hash_code] = hot_val + 1 if hot_flag: self.hot_access += 1 self.total_access += 1 if self.counter % self.cycle == 0: self.update() return hot_flag
def post(self, project_id): parser = reqparse.RequestParser() parser.add_argument('project-name', type=str, required=True, help='project name') parser.add_argument('project-path', type=str, required=True, help='project path') parser.add_argument('workflows', type=int, default=0, help='number of connected workfows') parser.add_argument('models', type=int, default=0, help='number of connected models') args = parser.parse_args() project_id = str(xxh32_intdigest(args['project-name'])) args['project-id'] = project_id db = get_plasma_db() project_collection = db.get_collection('projects') project_collection.insert(dict(args)) response_data = {'project-id': project_id} response = generate_response(201, response_data) return response
def is_file_modded( self, file_name: str, data: Union[ByteString, int], flag_new: bool = True ) -> bool: """Checks a file to see if it has been modified. Automatically decompresses yaz0 data. Args: file_name (str): The canonical resource path of the file to check data (Union[ByteString, int]): Either the file data (as a byteslike object) or an xxh32 hash as an int flag_new (bool, optional): Whether to flag new files (not in vanilla BOTW) as modified. Defaults to True. Returns: bool: Returns whether the file's hash matches a known version of the hash for the original version. """ if file_name not in self._table: return flag_new else: if isinstance(data, int): return data not in self._table[file_name] if data[0:4] == b"Yaz0": data = decompress(data) return xxh32_intdigest(data) not in self._table[file_name]
def compute_hash_int32(value): n = xxh32_intdigest(value) return (n ^ 0x80000000) - 0x80000000
def lookup(self, key: str) -> str: """lookup node for key""" hashed = xxh32_intdigest(key) return self.nodes[self.table[hashed % self.m]]
def hash_array(kmer): """Return a hash of a numpy array.""" return xxhash.xxh32_intdigest(kmer.tobytes())
def __init__( self, split, img_size=1344, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, prefix="", ): self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.stride = stride self.split = split self.imgs = [] self.img_files = [] self.labels = [] self.dimensions = 0 self.labels_map = { "table": 0, # "header": 1, # "para": 2, } db_client = MongoClient(os.getenv("MONGO_HOST", "localhost")) db = db_client[os.getenv("MONGO_DATABASE", "doc-store-dev")] file_idxs = db["bboxes"].distinct( "file_idx", { "audited": True, "block_type": { "$in": list(self.labels_map.keys()) } }, ) for file_idx in file_idxs: try: data = load_json(file_idx) except: print(f"error loading {file_idx}") continue # self.img_names.append(str(filename.name).split(".json")[0]) if not self.dimensions: self.dimensions = len(data["metadata"]["features"]) + 1 else: assert self.dimensions == len(data["metadata"]["features"]) + 1 pages = db["bboxes"].distinct( "page_idx", { "file_idx": file_idx, "audited": True, "block_type": { "$in": list(self.labels_map.keys()) }, }, ) for page_idx in pages: if self.split and isinstance(self.split, int): h = xxh32_intdigest(f"{file_idx}-{page_idx}") if self.split > 5: if h % 10 >= self.split: print("skipping current page for training") continue else: if h % 10 < (10 - self.split): print("skipping current page for testing") continue tokens = data["data"][page_idx] # HWC. # We hard code the padding to 0, thus image must in square (H:1344,W:1344 by default) features = np.zeros( (self.img_size, self.img_size, self.dimensions), dtype=np.float32, ) # make features, HWF for token in tokens: # x1, y1, x2, y2 = xyxy_to_training(token["position"]["xyxy"]) x1, y1, x2, y2 = [ round(x) for x in token["position"]["xyxy"] ] # token position mask features[y1:y2, x1:x2, 0] = 1 for i, feature in enumerate(token["features"]): features[y1:y2, x1:x2, i + 1] = feature # make labels page_labels = [] for label in db["bboxes"].find({ "file_idx": file_idx, "page_idx": page_idx, "audited": True, "block_type": { "$in": list(self.labels_map.keys()) }, }): label_type = self.labels_map[label["block_type"]] try: xyxy = correct_box(features, label["bbox"]) except Exception: continue label_coords = xyxy_to_training(xyxy, dim=(self.img_size, self.img_size)) labeled = [label_type] + label_coords page_labels.append(np.array(labeled)) if page_labels: self.imgs.append(features) self.img_files.append(f"file:{file_idx} page:{page_idx+1}") self.labels.append(page_labels) print( f"{len(page_labels)} labels for document {file_idx}, page {page_idx+1}" ) print( f"Found {len(self.imgs)} images with {sum([len(x) for x in self.labels])} labels" ) # label found, label missing, label empty, label corrupted, total label. self.shapes = np.array([[x.shape[0], x.shape[1]] for x in self.imgs], dtype=np.float64) # convert to np array self.labels = [np.array(x) for x in self.labels] n = len(self.imgs) # number of images bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n self.indices = range(n)
def hash(self, serialized_item): return xxh32_intdigest(serialized_item, seed=self.__seed)
elif len(ps) == 0: continue """ found = False hsh = xxh32_intdigest(ps) query = [r for r in table.where(f"xhashx == {hsh}")] if len(query) >= 1: for r in query: if r['password'] == ps: r['src'] |= 1 r['cnt'] += 1 found = True break if not found:""" pswd['xhashx'] = xxh32_intdigest(ps) pswd["password"] = ps pswd['src'] = 1 << source pswd.append() working += 1 #except UnicodeDecodeError: table.flush() #os.remove(DIRNAME + '/' + fname) except: print("calling except") table.flush() h5file.close()
def fast_hash(x): return xxhash.xxh32_intdigest(x)
def test_xxh32_intdigest(self): self.assertEqual(xxhash.xxh32_intdigest('a'), 1426945110) self.assertEqual(xxhash.xxh32_intdigest('a', 0), 1426945110) self.assertEqual(xxhash.xxh32_intdigest('a', 1), 4111757423) self.assertEqual(xxhash.xxh32_intdigest('a', 2**32 - 1), 3443684653)
def hash_torch(x): bytes = judo.to_numpy(x).tobytes() return xxhash.xxh32_intdigest(bytes)