Exemplo n.º 1
0
        if gold_label == 'neutral':
            float_gold_label = 2 / 3.0

        inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=float_gold_label)

        test_samples.append(inp_example)

print(len(test_samples))
print('Primeiro: ')
for t in train_samples:
    print(t)
print(train_samples[0])
with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)

print(word_embedding_model.fc1(x).size())


import math

from sentence_transformers import losses

from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

train_dataloader = DataLoader(train_samples[:100000], shuffle=True, batch_size=train_batch_size)
# train_dataloader = DataLoader(train_samples[:80000], shuffle=True, batch_size=train_batch_size)
train_loss = losses.CosineSimilarityLoss(model=word_embedding_model)


# Development set: Measure correlation between cosine score and gold labels
logging.info("Read SNLI benchmark dev dataset")