예제 #1
0
            'ACCUMULATION_STEPS': ACCUMULATION_STEPS,
            'LEARN_RATE': LEARN_RATE,
            'EPOCHS': EPOCHS,
            'WARMUP_STEPS': WARMUP_STEPS,
            'SEQUENCE_LENGTH': SEQUENCE_LENGTH,
        }, config_file, sort_keys=True, indent=4, separators=(',', ': '))

# Load and initialize model
MODEL_CLASS = load_model(MODEL_PREFIX)
TOKENIZER = MODEL_CLASS[0].from_pretrained(MODEL_NAME)
CONFIG = MODEL_CLASS[1].from_pretrained(MODEL_NAME, num_labels=3)
MODEL = MODEL_CLASS[2].from_pretrained(MODEL_NAME, config=CONFIG)

# Load training data
train_dataset = dataset(
    tokenize(chain(*(load_semeval(DATASET, 'train', lang) for lang in LANGS)), TOKENIZER, SEQUENCE_LENGTH))
train_sampler = RandomSampler(train_dataset)
train_dataset = DataLoader(train_dataset, sampler=train_sampler, batch_size=TRAIN_BATCH_SIZE, drop_last=True)

# Run Training
training(
  train_dataset,
  val_datasets(TOKENIZER, SEQUENCE_LENGTH),
  MODEL,
  EXPERIMENT,
  LEARN_RATE,
  WARMUP_STEPS,
  TRAIN_BATCH_SIZE,
  EPOCHS,
  ACCUMULATION_STEPS
)
예제 #2
0
                'LEARN_RATE': LEARN_RATE,
                'EPOCHS': EPOCHS,
                'WARMUP_STEPS': WARMUP_STEPS,
                'SEQUENCE_LENGTH': SEQUENCE_LENGTH,
            },
            config_file,
            sort_keys=True,
            indent=4,
            separators=(',', ': '))

# Load and initialize model
MODEL_CLASS = load_model(MODEL_PREFIX)
TOKENIZER = MODEL_CLASS[0].from_pretrained(MODEL_NAME)
CONFIG = MODEL_CLASS[1].from_pretrained(MODEL_NAME, num_labels=3)
MODEL = MODEL_CLASS[2].from_pretrained(MODEL_NAME, config=CONFIG)

# Load training data
train_dataset = dataset(
    tokenize(chain(*(load_semeval(DATASET, 'train', lang) for lang in LANGS)),
             TOKENIZER, SEQUENCE_LENGTH))
train_sampler = RandomSampler(train_dataset)
train_dataset = DataLoader(train_dataset,
                           sampler=train_sampler,
                           batch_size=TRAIN_BATCH_SIZE,
                           drop_last=True)

# Run Training
training(train_dataset, val_datasets(TOKENIZER, SEQUENCE_LENGTH), MODEL,
         EXPERIMENT, LEARN_RATE, WARMUP_STEPS, TRAIN_BATCH_SIZE, EPOCHS,
         ACCUMULATION_STEPS)