def get_one_vec(**kwargs): text = kwargs.get('text', '默认') is_lite = kwargs.get('is_lite', 0) vecs = text2vec(text) if is_lite: vecs = vecs[:, range(0, 768, 4)] # 64*3 = 192维度 return normalize(vecs).tolist()
def get_batch_vec(**kwargs): texts = kwargs.get('texts', ['默认']) is_lite = kwargs.get('is_lite', 0) vecs = texts2vec(texts) if is_lite: vecs = vecs[:, range(0, 768, 4)] # 64*3 = 192维度 return normalize(vecs).tolist()
def sent2vec(sent, w2v, tokenizer=str.split): return normalize(np.row_stack([w2v[w] for w in tokenizer(sent)]).mean(0))