Python BloomFilter примеры использования

Язык программирования: Python

Пространство имен/Пакет: bloom

Метод/Функция: BloomFilter

Примеров на hotexamples.com: 9

Python BloomFilter - 9 примеров найдено. Это лучшие примеры Python кода для bloom.BloomFilter, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def test_bloom_filter():
    bf = bloom.BloomFilter(bits=20, hashes=3)
    bf.add('hello')
    assert bf.contains('hello'), 'BloomFilter failed to add item "hello"'
    assert not bf.contains('hi'), 'BloomFilter failed to deny item "hi"'
    bf.add('hi')
    assert bf.contains('hello'), 'BloomFilter failed to add item "hello"'
    assert bf.contains('hi'), 'BloomFilter failed to add item "hi"'

Пример #2

Показать файл

Файл: my_Model.py Проект: cmjhaha886/Learned-Index-Structures

    h = .02  # step size in the mesh

    # we create an instance of SVM and fit out data. We do not scale our
    # data since we want to plot the support vectors
    C = 0.2  # SVM regularization parameter

    rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, Y)
    print('RBF: ')
    start = datetime.datetime.now()
    y_pred = rbf_svc.predict(X)
    end = datetime.datetime.now()
    print('========== Learned Bloom filter result =============')
    print("Learned Bloom average predict time: ", (end - start))
    y_label = [int(i) for i in Y]
    conf_matrix = confusion_matrix(y_label, y_pred)
    print(conf_matrix)
    # print(classification_report(y_label, y_pred))

    print('========== Traditional Bloom filter result =========')
    bloom = bloom.BloomFilter(len(X), fpr_b)
    for i in range(len(X)):
        if Y[i] == 1:
            bloom.add(X[i][0])
    result = []
    start = datetime.datetime.now()
    y_bloom = [bloom.check(x[0]) for x in X]
    end = datetime.datetime.now()
    print(bloom.size)
    print("Traditional Bloom average predict time: ", (end - start))
    print(confusion_matrix(y_label, y_bloom))

Пример #3

Показать файл

Файл: checkbloom.py Проект: 5l1v3r1/bloomHIBP

#!/usr/bin/env python
# Author Dario Clavijo 2017
# GPlv3

# used for checking have i been pwnd passwords against a bloomfilter

import bloom
import sys
import hashlib

#bf = bloom.BloomFilter(array_size=(1024**3)*8,do_hashing=True)
bf = bloom.BloomFilter(filename=sys.argv[1],
                       array_size=(1024**2) * 512,
                       do_hashing=False,
                       slice_bits=120,
                       slices=7,
                       ishex=True)

print bf.check(hashlib.sha1(sys.argv[2]).hexdigest())

Пример #4

Показать файл

#!/usr/bin/env python
# Author Dario Clavijo 2017
# GPlv3

import bloom
import sys
import fileinput

SIZEMB = int(sys.argv[1])
bf = bloom.BloomFilter(array_size=(1024**2) * SIZEMB,
                       do_hashing=False,
                       slice_bits=120,
                       slices=7,
                       ishex=True)

new = 0
seen = 0
fp = open(sys.argv[2], 'r+')
for line in fp:
    try:
        #h=str(int(line.rstrip(),16)).encode('utf8')
        h = line.rstrip()
        #print(h)
    except:
        h = None
    if h != None:
        if bf.update(h) == False:
            new += 1
        else:
            seen += 1
    print("new:%d seen:%d" % (new, seen))

Пример #5

Показать файл

Файл: zhihupachong.py Проект: zhao750456695/cvpro

import jsonpath
import json
import redis
import bloom
import pymysql.cursors

# ===== 连接数据库
conn = pymysql.connect(host="127.0.0.1",
                       user="******",
                       passwd="root",
                       db="zhihu",
                       charset='utf8',
                       use_unicode=True)
cursor = conn.cursor()

bf = bloom.BloomFilter(0.001, 100000000)

ssl._create_default_https_context = ssl._create_unverified_context

# ===== 设置头信息
ua = [
    'User-Agent:Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TheWorld)'
    'User-Agent:Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'
]
thisua = random.choice(ua)
headers = {"User-Agent": thisua}
headers1 = {
    'Cache-Control': 'max-age=0',
    'User-Agent': random.choice(ua),

Пример #6

Показать файл

Файл: mkbloom.py Проект: ribolzisalvador/fastBloomFilter

import sys
import bloom

filename = sys.argv[1]

try:
    Gigs = int(sys.argv[2])
except ValueError as verr:
    print "Plase input the correct number of Gigabytes of RAM to be used."
    exit(1)

if Gigs > 0:
    bf = bloom.BloomFilter(array_size=Gigs * (1024**3),
                           do_bkp=False,
                           do_hashing=False,
                           fast=False)
    bf.save(filename)

Пример #7

Показать файл

Файл: loadbloom.py Проект: ribolzisalvador/fastBloomFilter

#!/usr/bin/env python
# Author Dario Clavijo 2017
# GPlv3

import bloom
import sys

bf = bloom.BloomFilter()

fp = open(sys.argv[1], 'r')
for line in fp:
    bf.add(line.rstrip())
fp.close()

bf.save(sys.argv[2])

Пример #8

Показать файл

Файл: mkbloom.py Проект: isenbek/bloomfilter

import sys
import bloom

try:
    array_size = int(sys.argv[2])
except:
    array_size = (1024**3) * 5

bf = bloom.BloomFilter(array_size=array_size,
                       do_bkp=False,
                       do_hashing=False,
                       bitshuffle=False)
bf.filename = sys.argv[1]
bf.save()

Пример #9

Показать файл

        with open(sample, 'r') as sample_fh:
            content = sample_fh.read()

        # Get text from HTML content
        words = html.fromstring(content).text_content().replace("\n", "")
        words = re.findall(r"[\w]+", words)
        # Remove all punctuation etc., convert words to lower and delete
        # duplicates
        words = list(set([word.lower() for word in words]))

        # Remove common words
        words = remove_common_words(words)
        # Stemming to reduce the number of words
        words = list(set([p.stem(word, 0, len(word)-1) for word in words]))

        tmp_filter = bloom.BloomFilter(capacity=len(words),
                                       error_rate=error_rate)
        for word in words:
            tmp_filter.add(word)

        filters.append(tmp_filter.buckets)

        pages.append({"title": re.search(r"@title=(.*)\n", content).group(1),
                      "url": sample[3:]})

    # First Int32 is length
    filters_to_write = struct.pack("<i", len(filters))
    # Then comes the length of each filter
    for i in filters:
        filters_to_write += struct.pack("<i", len(i))
    # Finally comes the filters themselves
    for i in filters: