Python LSH.update примеры использования

Язык программирования: Python

Пространство имен/Пакет: snapy

Класс/Тип: LSH

Метод/Функция: update

Примеров на hotexamples.com: 4

Python LSH.update - 4 примера найдено. Это лучшие примеры Python кода для snapy.LSH.update, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LSH(19)

adjacency_list(6)

query(5)

update(4)

contains(2)

edge_list(2)

remove(2)

Пример #1

Показать файл

Файл: test_lsh.py Проект: pingpingmintra/SnaPy

def test_initialize_from_empty_lsh():
    lsh = LSH()
    assert lsh.no_of_bands is None
    assert lsh._buckets == defaultdict(list)
    assert lsh._i_bucket == defaultdict(list)
    assert lsh.permutations is None
    lsh.update(minhash, labels)
    assert list(lsh._i_bucket) == labels
    assert lsh.permutations == 100
    assert lsh.no_of_bands == 50

Пример #2

Показать файл

def test_initialize_from_empty_lsh():
    lsh = LSH()
    assert lsh.no_of_bands is None
    assert lsh._buckets == defaultdict(list)
    assert lsh._i_bucket == defaultdict(list)
    assert lsh.permutations is None
    lsh.update(minhash, labels)
    assert list(lsh._i_bucket) == labels
    buckets = lsh._buckets
    assert buckets[4466445138223010106] == [1, 8]
    assert buckets[-3939654010681976230] == [1, 4, 8]
    assert lsh.permutations == 100
    assert lsh.no_of_bands == 50

Пример #3

Показать файл

def test_update_lsh():
    lsh = LSH(minhash, labels)
    with pytest.raises(ValueError):
        lsh.update(minhash, labels)
    new_content = [
        'Jupiter is primarily composed of hydrogen with a quarter of its mass being helium',
        'Jupiter moving out of the inner Solar System would have allowed the formation of inner planets.'
    ]
    new_labels = [11, 12]
    incorrect_minhash = MinHash(new_content, permutations=10)
    with pytest.raises(ValueError):
        lsh.update(incorrect_minhash, new_labels)
    correct_minhash = MinHash(new_content)
    lsh.update(correct_minhash, new_labels)
    assert lsh.permutations == 100
    assert list(lsh._i_bucket) == labels + [11, 12]

Пример #4

Показать файл

#generate minhash aignitures for new text, and add new texts to LSH model
new_text = [
    'Jupiter is primarily composed of hydrogen with a quarter of its mass being helium',
    'Jupiter moving out of the inner Solar System would have allowed the formation of inner planets.',
]

new_labels = ['new_doc1', 'new_doc2']

#1.create minhash signitues for new text
new_minhash = MinHash(new_text,
                      n_gram=n_gram,
                      permutations=permutations,
                      hash_bits=hash_bits,
                      seed=seed)

#2.update lsh model with new hash signitures and verify lsh model updates reflected
lsh.update(new_minhash, new_labels)
print(lsh.contains())

#print the adjacency_list of all docs
print(lsh.adjacency_list())

#print the edge list of all docs that are flagged as duplicates to plot in text similarity graph
print(lsh.edge_list())

#remove text and label from model (if its not there , you will get an error returned)
lsh.remove(6)
print(lsh.contains())

#get matrix(n*m) of text signatures generated by minhash function (n=text row, m=selected permutations)
minhash.signatures.shape