Exemplos de SentenceTransformer.encode_multi_process em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: sentence_transformers

Método / Função: encode_multi_process

Exemplos em hotexamples.com: 4

SentenceTransformer.encode_multi_process em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de sentence_transformers.SentenceTransformer.encode_multi_process em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

SentenceTransformer(30)

encode(30)

get_sentence_embedding_dimension(25)

evaluate(25)

fit(17)

eval(16)

cuda(11)

compile(4)

encode_multi_process(4)

_first_module(4)

load_state_dict(3)

inference_from_dicts(3)

get_max_seq_length(3)

add_module(2)

extract_vectors(2)

embed_sentences(2)

encoder(1)

extract_keywords(1)

build_vocab(1)

fc1(1)

encode_torch(1)

get_beta(1)

add(1)

_last_module(1)

get_sentence_features(1)

get_theta(1)

half(1)

lm_head(1)

load_model(1)

Métodos Frequentes

SentenceTransformer (30)

encode (30)

get_sentence_embedding_dimension (25)

evaluate (25)

fit (17)

eval (16)

cuda (11)

compile (4)

encode_multi_process (4)

_first_module (4)

Métodos Frequentes

load_state_dict (3)

inference_from_dicts (3)

get_max_seq_length (3)

add_module (2)

extract_vectors (2)

embed_sentences (2)

encoder (1)

extract_keywords (1)

build_vocab (1)

fc1 (1)

encode_torch (1)

get_beta (1)

add (1)

_last_module (1)

get_sentence_features (1)

get_theta (1)

half (1)

lm_head (1)

load_model (1)

Métodos Frequentes

encode_torch (1)

get_beta (1)

add (1)

_last_module (1)

get_sentence_features (1)

get_theta (1)

half (1)

lm_head (1)

load_model (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_compute_embeddings.py Projeto: adriensas/sentencetransformers

class ComputeEmbeddingsTest(unittest.TestCase): def setUp(self): self.model = SentenceTransformer('paraphrase-distilroberta-base-v1') def test_encode_single_sentences(self): #Single sentence emb = self.model.encode("Hello Word, a test sentence") assert emb.shape == (768, ) assert abs(np.sum(emb) - 7.9811716) < 0.001 # Single sentence as list emb = self.model.encode(["Hello Word, a test sentence"]) assert emb.shape == (1, 768) assert abs(np.sum(emb) - 7.9811716) < 0.001 # Sentence list emb = self.model.encode([ "Hello Word, a test sentence", "Here comes another sentence", "My final sentence" ]) assert emb.shape == (3, 768) print(np.sum(emb)) assert abs(np.sum(emb) - 22.968266) < 0.001 def test_encode_tuple_sentences(self): # Input a sentence tuple emb = self.model.encode([("Hello Word, a test sentence", "Second input for model")]) assert emb.shape == (1, 768) assert abs(np.sum(emb) - 9.503508) < 0.001 # List of sentence tuples emb = self.model.encode([("Hello Word, a test sentence", "Second input for model"), ("My second tuple", "With two inputs"), ("Final tuple", "final test")]) assert emb.shape == (3, 768) assert abs(np.sum(emb) - 32.14627) < 0.001 def test_multi_gpu_encode(self): # Start the multi-process pool on all available CUDA devices pool = self.model.start_multi_process_pool(['cpu', 'cpu']) sentences = ["This is sentence {}".format(i) for i in range(1000)] # Compute the embeddings using the multi-process pool emb = self.model.encode_multi_process(sentences, pool, chunk_size=50) assert emb.shape == (1000, 768) emb_normal = self.model.encode(sentences) diff = np.sum(np.abs(emb - emb_normal)) assert diff < 0.001

Exemplo n.º 2

0

Exibir arquivo

class ComputeMultiProcessTest(unittest.TestCase): def setUp(self): self.model = SentenceTransformer('paraphrase-distilroberta-base-v1') def test_multi_gpu_encode(self): # Start the multi-process pool on all available CUDA devices pool = self.model.start_multi_process_pool(['cpu', 'cpu']) sentences = ["This is sentence {}".format(i) for i in range(1000)] # Compute the embeddings using the multi-process pool emb = self.model.encode_multi_process(sentences, pool, chunk_size=50) assert emb.shape == (len(sentences), 768) emb_normal = self.model.encode(sentences) diff = np.max(np.abs(emb - emb_normal)) print("Max multi proc diff", diff) assert diff < 0.001

Exemplo n.º 3

0

Exibir arquivo

Arquivo: computing_embeddings_mutli_gpu.py Projeto: adriensas/sentencetransformers

sentences in parallel. This gives a near linear speed-up when encoding large text collections. """ from sentence_transformers import SentenceTransformer, LoggingHandler import logging logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO, handlers=[LoggingHandler()]) #Important, you need to shield your code with if __name__. Otherwise, CUDA runs into issues when spawning new processes. if __name__ == '__main__': #Create a large list of 100k sentences sentences = ["This is sentence {}".format(i) for i in range(100000)] #Define the model model = SentenceTransformer('paraphrase-distilroberta-base-v1') #Start the multi-process pool on all available CUDA devices pool = model.start_multi_process_pool() #Compute the embeddings using the multi-process pool emb = model.encode_multi_process(sentences, pool) print("Embeddings computed. Shape:", emb.shape) #Optional: Stop the proccesses in the pool model.stop_multi_process_pool(pool)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_compute_embeddings.py Projeto: shinoyuki222/sentence-transformers

class ComputeEmbeddingsTest(unittest.TestCase): def setUp(self): self.model = SentenceTransformer('paraphrase-distilroberta-base-v1') def test_encode_token_embeddings(self): """ Test that encode(output_value='token_embeddings') works :return: """ sent = ["Hello Word, a test sentence", "Here comes another sentence", "My final sentence", "Sentences", "Sentence five five five five five five five"] emb = self.model.encode(sent, output_value='token_embeddings', batch_size=2) assert len(emb) == len(sent) for s, e in zip(sent, emb): assert len(self.model.tokenize([s])['input_ids'][0]) == e.shape[0] def test_encode_single_sentences(self): #Single sentence emb = self.model.encode("Hello Word, a test sentence") assert emb.shape == (768,) assert abs(np.sum(emb) - 7.9811716) < 0.001 # Single sentence as list emb = self.model.encode(["Hello Word, a test sentence"]) assert emb.shape == (1, 768) assert abs(np.sum(emb) - 7.9811716) < 0.001 # Sentence list emb = self.model.encode(["Hello Word, a test sentence", "Here comes another sentence", "My final sentence"]) assert emb.shape == (3, 768) assert abs(np.sum(emb) - 22.968266) < 0.001 def test_encode_normalize(self): emb = self.model.encode(["Hello Word, a test sentence", "Here comes another sentence", "My final sentence"], normalize_embeddings=True) assert emb.shape == (3, 768) for norm in np.linalg.norm(emb, axis=1): assert abs(norm - 1) < 0.001 def test_encode_tuple_sentences(self): # Input a sentence tuple emb = self.model.encode([("Hello Word, a test sentence", "Second input for model")]) assert emb.shape == (1, 768) assert abs(np.sum(emb) - 9.503508) < 0.001 # List of sentence tuples emb = self.model.encode([("Hello Word, a test sentence", "Second input for model"), ("My second tuple", "With two inputs"), ("Final tuple", "final test")]) assert emb.shape == (3, 768) assert abs(np.sum(emb) - 32.14627) < 0.001 def test_multi_gpu_encode(self): # Start the multi-process pool on all available CUDA devices pool = self.model.start_multi_process_pool(['cpu', 'cpu']) sentences = ["This is sentence {}".format(i) for i in range(1000)] # Compute the embeddings using the multi-process pool emb = self.model.encode_multi_process(sentences, pool, chunk_size=50) assert emb.shape == (1000, 768) emb_normal = self.model.encode(sentences) diff = np.sum(np.abs(emb - emb_normal)) assert diff < 0.001