hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, num_labels=2)) model.to(DEVICE) PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token] model.bert.embeddings.word_embeddings.weight.data[PAD_IDX] = torch.zeros(512) model.bert.embeddings.word_embeddings.weight.data[UNK_IDX] = torch.zeros(512) print( f'Parameter: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}' ) optimizer = RMSprop(model.parameters(), lr=0.000005) itr = 1 epochs = 20 total_loss = 0 total_len = 0 total_correct = 0 for epoch in range(epochs): model.train() print(f"Epoch {epoch + 1}/{epochs}") for batch in train_iterator: optimizer.zero_grad() outputs = model(batch.text, labels=batch.label) loss, logits = outputs
max_position_embeddings=512, intermediate_size=1024, hidden_size=512, num_attention_heads=8, num_hidden_layers=6, type_vocab_size=5, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, num_labels=2 ) ) model.to(device) print(f'모델의 파라미터 수는 {sum(p.numel() for p in model.parameters() if p.requires_grad):,} 개 입니다.') optimizer = RMSprop(model.parameters(), lr=1e-6) itr = 1 epochs = 30 total_loss = 0 total_len = 0 total_correct = 0 model.train() for epoch in range(epochs): for batch in train_iterator: optimizer.zero_grad() outputs = model(batch.text, labels=batch.label) loss, logits = outputs