Exemplo n.º 1
0
# create a new label encoder
le = LabelEncoder()
# encode train set labels
y_train = le.fit_transform(y_train)  # EX1
# encode test set labels
y_test = le.fit_transform(y_test)  # EX1
# compute number of classes made by the encoder
n_classes = le.classes_.size
#print("EX1 answer printing")
#print("First 10 unencoded labels from the training set are: ")
#print(le.inverse_transform(y_train[:10]))
#print("First 10 encoded labels from the training set are: ")
#print(y_train[:10])

# Define our PyTorch-based Dataset
train_set = SentenceDataset(X_train, y_train, word2idx)
# ------------ #
#     EX2      #
# ------------ #
# EX2
# print first 10 tokenized training examples
#print("EX2 printing")
#for i in range(10):
#    print(train_set.data[i])

# ------------ #
#     EX3      #
# ------------ #
# EX3
# print("EX3 printing")
# for i in range(5):
Exemplo n.º 2
0
# Load the raw data
if DATASET == "Semeval2017A":
    _, _, X_test, y_test = load_Semeval2017A()
else:
    raise ValueError("Invalid dataset")

# Convert data labels from strings to integers
# create a new label encoder
le = LabelEncoder()
# encode test set labels
y_test = le.fit_transform(y_test)  # EX1
# compute number of classes made by the encoder
n_classes = le.classes_.size

# Define our PyTorch-based Dataset
test_set = SentenceDataset(X_test, y_test, word2idx)
tweetToken = TweetTokenizer()
text = [tweetToken.tokenize(example) for example in X_test]

# Define our PyTorch-based DataLoader
# Batch size is 1.
test_loader = DataLoader(test_set)
# Load user model.
model = torch.load(sys.argv[1]).to(DEVICE)
# Define criterion for evaluation.
loss_function = torch.nn.CrossEntropyLoss()

model.eval()
# IMPORTANT: in evaluation mode, we don't want to keep the gradients
# so we do everything under torch.no_grad()
data = []