Python generate_embedding示例，utils.generate_embedding Python示例

示例#1

0

显示文件

文件： SBERT_WK.py 项目： TheNeuromancer/SentEmb

def batcher(params, batch):

    model = params['model']
    sentences = [' '.join(s) for s in batch]

    tokenizer = params['tokenizer']
    sentences_index = [
        tokenizer.encode(s, add_special_tokens=True) for s in sentences
    ]

    features_input_ids = []
    features_mask = []
    for sent_ids in sentences_index:
        # Truncate if too long
        if len(sent_ids) > params['max_seq_length']:
            sent_ids = sent_ids[:params['max_seq_length']]
        sent_mask = [1] * len(sent_ids)
        # Padding
        padding_length = params['max_seq_length'] - len(sent_ids)
        sent_ids += ([0] * padding_length)
        sent_mask += ([0] * padding_length)
        # Length Check
        assert len(sent_ids) == params['max_seq_length']
        assert len(sent_mask) == params['max_seq_length']

        features_input_ids.append(sent_ids)
        features_mask.append(sent_mask)

    batch_input_ids = torch.tensor(features_input_ids, dtype=torch.long)
    batch_input_mask = torch.tensor(features_mask, dtype=torch.long)
    batch = [batch_input_ids.to(device), batch_input_mask.to(device)]

    inputs = {"input_ids": batch[0], "attention_mask": batch[1]}
    model.zero_grad()

    with torch.no_grad():
        features = model(**inputs)[1]
    '''
    features = [layer_emb.cpu().numpy() for layer_emb in features]
    all_layer_embedding = []
    for i in range(features[0].shape[0]):
        all_layer_embedding.append(np.array([layer_emb[i] for layer_emb in features]))
    '''

    all_layer_embedding = torch.stack(features).permute(1, 0, 2,
                                                        3).cpu().numpy()

    embed_method = utils.generate_embedding(params['embed_method'],
                                            features_mask)
    embedding = embed_method.embed(params, all_layer_embedding)

    return embedding

示例#2

0

显示文件

文件： sen_emb.py 项目： sts-sadr/SBERT-WK-Sentence-Embedding

        # Length Check
        assert len(sent_ids) == params['max_seq_length']
        assert len(sent_mask) == params['max_seq_length']

        features_input_ids.append(sent_ids)
        features_mask.append(sent_mask)

    batch_input_ids = torch.tensor(features_input_ids, dtype=torch.long)
    batch_input_mask = torch.tensor(features_mask, dtype=torch.long)
    batch = [batch_input_ids.to(device), batch_input_mask.to(device)]

    inputs = {"input_ids": batch[0], "attention_mask": batch[1]}
    model.zero_grad()

    with torch.no_grad():
        features = model(**inputs)[1]

    features = [layer_emb.cpu().numpy() for layer_emb in features]
    all_layer_embedding = []
    for i in range(features[0].shape[0]):
        all_layer_embedding.append(
            np.array([layer_emb[i] for layer_emb in features]))

    embed_method = utils.generate_embedding(params['embed_method'],
                                            features_mask)
    embedding = embed_method.embed(params, all_layer_embedding)

    similarity = embedding[0].dot(embedding[1]) / np.linalg.norm(
        embedding[0]) / np.linalg.norm(embedding[1])
    print('The similarity between these two sentences are (from 0-1):',
          similarity)

示例#3

0

显示文件

        left = ya != 0 or yc != 0
        right = yb != 0 or yd != 0
        left_eye_and_nose = ya != 0 or ye != 0
        right_eye_and_nose = yb != 0 or ye != 0
        left_mouth_and_nose = yc != 0 or ye != 0
        right_mouth_and_nose = yd != 0 or ye != 0
        if eye is True or mouth is True or left is True or right is True or left_eye_and_nose is True or right_eye_and_nose is True or left_mouth_and_nose is True or right_mouth_and_nose is True:
            likely_face = True

        name = ''
        iobject = transform.resize(image=iobject, output_shape=[112, 112])
        iobject = np.mean(iobject, axis=-1, keepdims=True)
        iobject = np.concatenate([iobject, iobject, iobject], axis=-1)
        iobject = np.array(iobject, dtype='int32')
        embedding1d = generate_embedding(interpreter=interpreter3,
                                         input_details=input_details3,
                                         output_details=output_details3,
                                         pix=iobject)
        oid = recognize(embedding2d=embedding2d, embedding1d=embedding1d)
        name = '' if oid == -1 else ID_map[oid]

        if likely_face is not True and name == '':
            continue

        scale = edge / 112
        ya, xa = y1 + int(scale * ya), x1 + int(scale * xa)
        yb, xb = y1 + int(scale * yb), x1 + int(scale * xb)
        yc, xc = y1 + int(scale * yc), x1 + int(scale * xc)
        yd, xd = y1 + int(scale * yd), x1 + int(scale * xd)
        ye, xe = y1 + int(scale * ye), x1 + int(scale * xe)

        bboxes.append(

示例#4

0

显示文件

for i in range(total_ids):
    ID = IDs[i]
    file_names = listdir(ids_dir + '/' + ID)
    while '.DS_Store' in file_names:
        file_names.remove('.DS_Store')
    embeddings = []
    for j in range(len(file_names)):
        file_name = file_names[j]
        pix = io.imread(ids_dir + '/' + ID + '/' + file_name)
        pix = np.mean(pix, axis=-1, keepdims=True)
        pix = np.concatenate([pix, pix, pix], axis=-1)
        pix = np.array(pix, dtype='int32')

        # Origin
        embedding = generate_embedding(interpreter=interpreter,
                                       input_details=input_details,
                                       output_details=output_details,
                                       pix=pix)
        embeddings.append(embedding)

        # Flip
        pix = np.fliplr(pix)
        embedding = generate_embedding(interpreter=interpreter,
                                       input_details=input_details,
                                       output_details=output_details,
                                       pix=pix)
        embeddings.append(embedding)

    embedding2d[i] = np.mean(np.array(embeddings, dtype='float32'), axis=0)

np.save(output_path + '/embedding2d.npy', embedding2d)