示例#1
0
 def test_lookup_nan(self, writable):
     # GH#21688 ensure we can deal with readonly memory views
     xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])
     xs.setflags(write=writable)
     m = ht.Float64HashTable()
     m.map_locations(xs)
     tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))
示例#2
0
def unique1d(values):
    """
    Hash table-based unique
    """
    if np.issubdtype(values.dtype, np.floating):
        table = htable.Float64HashTable(len(values))
        uniques = np.array(table.unique(_ensure_float64(values)),
                           dtype=np.float64)
    elif np.issubdtype(values.dtype, np.datetime64):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
        uniques = uniques.view('M8[ns]')
    elif np.issubdtype(values.dtype, np.timedelta64):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
        uniques = uniques.view('m8[ns]')
    elif np.issubdtype(values.dtype, np.signedinteger):
        table = htable.Int64HashTable(len(values))
        uniques = table.unique(_ensure_int64(values))
    elif np.issubdtype(values.dtype, np.unsignedinteger):
        table = htable.UInt64HashTable(len(values))
        uniques = table.unique(_ensure_uint64(values))
    else:

        # its cheaper to use a String Hash Table than Object
        if lib.infer_dtype(values) in ['string']:
            table = htable.StringHashTable(len(values))
        else:
            table = htable.PyObjectHashTable(len(values))

        uniques = table.unique(_ensure_object(values))

    return uniques
示例#3
0
 def test_add_signed_zeros(self):
     # GH#21866 inconsistent hash-function for float64
     # default hash-function would lead to different hash-buckets
     # for 0.0 and -0.0 if there are more than 2^30 hash-buckets
     # but this would mean 16GB
     N = 4  # 12 * 10**8 would trigger the error, if you have enough memory
     m = ht.Float64HashTable(N)
     m.set_item(0.0, 0)
     m.set_item(-0.0, 0)
     assert len(m) == 1  # 0.0 and -0.0 are equivalent
示例#4
0
 def test_add_different_nans(self):
     # GH#21866 inconsistent hash-function for float64
     # create different nans from bit-patterns:
     NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
     NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
     assert NAN1 != NAN1
     assert NAN2 != NAN2
     # default hash function would lead to different hash-buckets
     # for NAN1 and NAN2 even if there are only 4 buckets:
     m = ht.Float64HashTable()
     m.set_item(NAN1, 0)
     m.set_item(NAN2, 0)
     assert len(m) == 1  # NAN1 and NAN2 are equivalent