Exemplo n.º 1
0
    def __init__(self, input_dim, output_dim, virtual_weight_dim, placeholders,
                 layer_num, dropout=0., sparse_inputs=False, act=tf.nn.relu,
                 bias=False, featureless=False, **kwargs):
        super(GraphConvolutionCompressed, self).__init__(**kwargs)

        if dropout:
            self.dropout = placeholders['dropout']
        else:
            self.dropout = 0.

        self.act = act
        self.support = placeholders['support']
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.virtual_weight_dim = virtual_weight_dim
        self.seed = layer_num

        # helper variable for sparse dropout
        self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            for i in range(len(self.support)):
                # Use uniform weight initialization
                self.vars['weights_' + str(i)] = uniform([virtual_weight_dim],
                                                         name='weights_' + str(i))
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        # build up hash mappings
        mappings = []
        signs = []
        for k in range(self.input_dim):
            mappings.append([])
            signs.append([])
            for j in range(self.output_dim):
                # Compute hash index
                virtual_weight_idx = \
                    xxhash.xxh32_intdigest(str((k,j)), self.seed) % self.virtual_weight_dim
                hashed_sign = 1. - 2 * (xxhash.xxh32_intdigest(str((k, j)), self.seed + 1000) % 2)
                mappings[-1].append(virtual_weight_idx)
                signs[-1].append(hashed_sign)
        self.mappings = tf.stack(mappings)
        self.signs = tf.stack(signs)

        if self.logging:
            self._log_vars()
Exemplo n.º 2
0
    def _gen_permutation(self) -> List[List[int]]:
        """generate permutations"""

        m = self.m
        permutation: List[List[int]] = []

        for i, name in enumerate(list(self.nodes)):
            offset = xxh32_intdigest(name, seed=const.MAGLEV_OFFSET_SEED) % m
            skip = xxh32_intdigest(name,
                                   seed=const.MAGLEV_SKIP_SEED) % (m - 1) + 1
            permutation.append([])

            for j in range(0, m):
                permutation[i].append((offset + j * skip) % m)

        return permutation
Exemplo n.º 3
0
 def check_hot(self, filename):
     for i in range(self.hashFuncNum):
         hash_code = xxhash.xxh32_intdigest(filename, self.seeds[i]) // 2**7
         hot_val = self.hash_table[hash_code]
         if hot_val < self.threshold:
             return False
     return True
Exemplo n.º 4
0
    def __get_anchors(self, number: int):
        number_str = str(number)
        anchors = []
        for seed in range(self.__num_hash):
            anchor = xxh32_intdigest(number_str, seed=seed)
            anchors.append(anchor % self.__length)

        return anchors
Exemplo n.º 5
0
    def test_xxh32_overflow(self):
        s = 'I want an unsigned 32-bit seed!'
        a = xxhash.xxh32(s, seed=0)
        b = xxhash.xxh32(s, seed=2**32)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=0))
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**32))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=0))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**32))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=0))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**32))


        a = xxhash.xxh32(s, seed=1)
        b = xxhash.xxh32(s, seed=2**32+1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=1))
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**32+1))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=1))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**32+1))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=1))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**32+1))

        a = xxhash.xxh32(s, seed=2**33-1)
        b = xxhash.xxh32(s, seed=2**34-1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**33-1))
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**34-1))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**33-1))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**34-1))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**33-1))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**34-1))

        a = xxhash.xxh32(s, seed=2**65-1)
        b = xxhash.xxh32(s, seed=2**66-1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**65-1))
        self.assertEqual(a.intdigest(), xxhash.xxh32_intdigest(s, seed=2**66-1))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**65-1))
        self.assertEqual(a.digest(), xxhash.xxh32_digest(s, seed=2**66-1))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**65-1))
        self.assertEqual(a.hexdigest(), xxhash.xxh32_hexdigest(s, seed=2**66-1))
Exemplo n.º 6
0
 def __init__(self, name, **kwargs):
     super().__init__(name, **kwargs)
     self.description = kwargs.get("description", "")
     self.idx = kwargs.get("idx", [])
     self.perturbable = kwargs.get("perturbable", True)
     # TODO: (Verify) Could initialize the RNG right away, since cloudpickle should still be able to pickle it
     self._rng_seed = xxhash.xxh32_intdigest(name)
     self.rng = None  # RNG used for permuting this feature - see perturbations.py: 'feature.rng'
     for key, value in ATTRIBUTES.items():
         setattr(self, key, value)
Exemplo n.º 7
0
 def add_access(self, filename):
     self.counter += 1
     hot_flag = True
     for i in range(self.hashFuncNum):
         hash_code = xxhash.xxh32_intdigest(filename, self.seeds[i]) // 2**7
         hot_val = self.hash_table[hash_code]
         if hot_val + 1 < self.threshold:
             hot_flag = False
         self.hash_table[hash_code] = hot_val + 1
     if hot_flag:
         self.hot_access += 1
     self.total_access += 1
     if self.counter % self.cycle == 0:
         self.update()
     return hot_flag
Exemplo n.º 8
0
 def post(self, project_id):
     parser = reqparse.RequestParser()
     parser.add_argument('project-name', type=str,
                         required=True, help='project name')
     parser.add_argument('project-path', type=str,
                         required=True, help='project path')
     parser.add_argument('workflows', type=int, default=0,
                         help='number of connected workfows')
     parser.add_argument('models', type=int, default=0,
                         help='number of connected models')
     args = parser.parse_args()
     project_id = str(xxh32_intdigest(args['project-name']))
     args['project-id'] = project_id
     db = get_plasma_db()
     project_collection = db.get_collection('projects')
     project_collection.insert(dict(args))
     response_data = {'project-id': project_id}
     response = generate_response(201, response_data)
     return response
Exemplo n.º 9
0
    def is_file_modded(
        self, file_name: str, data: Union[ByteString, int], flag_new: bool = True
    ) -> bool:
        """Checks a file to see if it has been modified. Automatically decompresses yaz0 data.

        Args:
            file_name (str): The canonical resource path of the file to check
            data (Union[ByteString, int]): Either the file data (as a byteslike object) or an xxh32
                hash as an int
            flag_new (bool, optional): Whether to flag new files (not in vanilla BOTW) as modified.
                Defaults to True.

        Returns:
            bool: Returns whether the file's hash matches a known version of the hash for the
                original version.
        """
        if file_name not in self._table:
            return flag_new
        else:
            if isinstance(data, int):
                return data not in self._table[file_name]
            if data[0:4] == b"Yaz0":
                data = decompress(data)
            return xxh32_intdigest(data) not in self._table[file_name]
Exemplo n.º 10
0
def compute_hash_int32(value):
    n = xxh32_intdigest(value)
    return (n ^ 0x80000000) - 0x80000000
Exemplo n.º 11
0
    def lookup(self, key: str) -> str:
        """lookup node for key"""

        hashed = xxh32_intdigest(key)
        return self.nodes[self.table[hashed % self.m]]
Exemplo n.º 12
0
def hash_array(kmer):
    """Return a hash of a numpy array."""
    return xxhash.xxh32_intdigest(kmer.tobytes())
Exemplo n.º 13
0
    def __init__(
        self,
        split,
        img_size=1344,
        batch_size=16,
        augment=False,
        hyp=None,
        rect=False,
        image_weights=False,
        cache_images=False,
        single_cls=False,
        stride=32,
        pad=0.0,
        prefix="",
    ):
        self.img_size = img_size
        self.augment = augment
        self.hyp = hyp
        self.image_weights = image_weights

        self.stride = stride
        self.split = split

        self.imgs = []
        self.img_files = []
        self.labels = []
        self.dimensions = 0

        self.labels_map = {
            "table": 0,
            # "header": 1,
            # "para": 2,
        }

        db_client = MongoClient(os.getenv("MONGO_HOST", "localhost"))
        db = db_client[os.getenv("MONGO_DATABASE", "doc-store-dev")]

        file_idxs = db["bboxes"].distinct(
            "file_idx",
            {
                "audited": True,
                "block_type": {
                    "$in": list(self.labels_map.keys())
                }
            },
        )

        for file_idx in file_idxs:
            try:
                data = load_json(file_idx)
            except:
                print(f"error loading {file_idx}")
                continue

            # self.img_names.append(str(filename.name).split(".json")[0])
            if not self.dimensions:
                self.dimensions = len(data["metadata"]["features"]) + 1
            else:
                assert self.dimensions == len(data["metadata"]["features"]) + 1

            pages = db["bboxes"].distinct(
                "page_idx",
                {
                    "file_idx": file_idx,
                    "audited": True,
                    "block_type": {
                        "$in": list(self.labels_map.keys())
                    },
                },
            )
            for page_idx in pages:
                if self.split and isinstance(self.split, int):
                    h = xxh32_intdigest(f"{file_idx}-{page_idx}")
                    if self.split > 5:
                        if h % 10 >= self.split:
                            print("skipping current page for training")
                            continue
                    else:
                        if h % 10 < (10 - self.split):
                            print("skipping current page for testing")
                            continue

                tokens = data["data"][page_idx]

                # HWC.
                # We hard code the padding to 0, thus image must in square (H:1344,W:1344 by default)
                features = np.zeros(
                    (self.img_size, self.img_size, self.dimensions),
                    dtype=np.float32,
                )

                # make features, HWF
                for token in tokens:
                    # x1, y1, x2, y2 = xyxy_to_training(token["position"]["xyxy"])
                    x1, y1, x2, y2 = [
                        round(x) for x in token["position"]["xyxy"]
                    ]

                    # token position mask
                    features[y1:y2, x1:x2, 0] = 1

                    for i, feature in enumerate(token["features"]):
                        features[y1:y2, x1:x2, i + 1] = feature

                # make labels
                page_labels = []
                for label in db["bboxes"].find({
                        "file_idx": file_idx,
                        "page_idx": page_idx,
                        "audited": True,
                        "block_type": {
                            "$in": list(self.labels_map.keys())
                        },
                }):
                    label_type = self.labels_map[label["block_type"]]

                    try:
                        xyxy = correct_box(features, label["bbox"])
                    except Exception:
                        continue

                    label_coords = xyxy_to_training(xyxy,
                                                    dim=(self.img_size,
                                                         self.img_size))

                    labeled = [label_type] + label_coords

                    page_labels.append(np.array(labeled))

                if page_labels:
                    self.imgs.append(features)

                    self.img_files.append(f"file:{file_idx} page:{page_idx+1}")

                    self.labels.append(page_labels)
                    print(
                        f"{len(page_labels)} labels for document {file_idx}, page {page_idx+1}"
                    )

        print(
            f"Found {len(self.imgs)} images with {sum([len(x) for x in self.labels])} labels"
        )

        # label found, label missing, label empty, label corrupted, total label.
        self.shapes = np.array([[x.shape[0], x.shape[1]] for x in self.imgs],
                               dtype=np.float64)
        # convert to np array
        self.labels = [np.array(x) for x in self.labels]

        n = len(self.imgs)  # number of images
        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
        nb = bi[-1] + 1  # number of batches
        self.batch = bi  # batch index of image
        self.n = n
        self.indices = range(n)
Exemplo n.º 14
0
 def hash(self, serialized_item):
     return xxh32_intdigest(serialized_item, seed=self.__seed)
Exemplo n.º 15
0
                    elif len(ps) == 0:
                        continue
                    """
                        found = False
                        hsh = xxh32_intdigest(ps)
                        query = [r for r in table.where(f"xhashx == {hsh}")]
                        if len(query) >= 1:
                            for r in query:
                                if r['password'] == ps:
                                    r['src'] |= 1
                                    r['cnt'] += 1
                                    found = True
                                    break
                                    
                        if not found:"""
                    pswd['xhashx'] = xxh32_intdigest(ps)
                    pswd["password"] = ps
                    pswd['src'] = 1 << source

                    pswd.append()

                    working += 1
            #except UnicodeDecodeError:

            table.flush()
            #os.remove(DIRNAME + '/' +  fname)

    except:
        print("calling except")
        table.flush()
        h5file.close()
Exemplo n.º 16
0
 def fast_hash(x):
     return xxhash.xxh32_intdigest(x)
Exemplo n.º 17
0
 def test_xxh32_intdigest(self):
     self.assertEqual(xxhash.xxh32_intdigest('a'), 1426945110)
     self.assertEqual(xxhash.xxh32_intdigest('a', 0), 1426945110)
     self.assertEqual(xxhash.xxh32_intdigest('a', 1), 4111757423)
     self.assertEqual(xxhash.xxh32_intdigest('a', 2**32 - 1), 3443684653)
Exemplo n.º 18
0
 def hash_torch(x):
     bytes = judo.to_numpy(x).tobytes()
     return xxhash.xxh32_intdigest(bytes)