def train(all_args, model, tokenizer, train_dataset, valid_dataset, ignore_index): """ Trains GPT2 model and logs necessary details. Args: all_args: argparse object with train loop settings model: gpt2 model tokenizer: GPT2 tokenizer train_dataset: GPT21024Dataset object with training data ignore_index: token not considered in loss calculation """ # создаем даталоадер из трейнового датасета train_sampler = RandomSampler(train_dataset) train_dl = DataLoader(train_dataset, sampler=train_sampler, batch_size=all_args.batch_size, num_workers=all_args.num_workers) # задаем лосс функцию, оптимизатор и планировщик loss_fct = CrossEntropyLoss(ignore_index=ignore_index) # ignores padding token for loss calculation optimizer = AdamW(model.parameters(), lr=all_args.lr) scheduler = get_linear_schedule_with_warmup(optimizer, 100, 80000) global_step = 0 model.zero_grad() set_seed(all_args) for epoch_number in range(1, all_args.num_train_epochs + 1): epoch_iterator = tqdm(train_dl, desc="Training") for step, batch in enumerate(epoch_iterator): inputs, labels = torch.tensor(batch['article']), torch.tensor(batch['article']) inputs = inputs.to(all_args.device) labels = labels.to(all_args.device) attention_mask = torch.tensor(batch['attention_mask']).to(all_args.device) model.train() logits = model(inputs, attention_mask=attention_mask)[0] index = batch['sum_idx'] # тут индексы токенов-разделителей для каждой последовательности в батче shape = (batch,) loss = 0 for idx, logs, labs in zip(index, logits, labels): shift_logits = logs[idx:-1, :] # для вычисления лоса берем часть последовательностей справа от сепаратора shift_labels = labs[idx + 1:] # смещаем предсказания и лейблы на одну позицию относительно друг друга loss += loss_fct(shift_logits, shift_labels) loss = loss / all_args.gradient_accumulation_steps / index.shape[0] # лосс делится на размер батча # и количество шагов аккамуляции градиента loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), all_args.max_grad_norm) if (step + 1) % all_args.gradient_accumulation_steps == 0: optimizer.step() # делаем шаг оптимизатора раз в gradient_accumulation_steps батчей scheduler.step() model.zero_grad() global_step += 1 print("updates passed: %d\tloss: %f" % (global_step, loss.item()), end='\n\n') if (step + 1) % (500 * all_args.gradient_accumulation_steps) == 0: # раз в 30 шагов оптимизатора выводим сэмплы резюме и подсчитываем метрики на валидации print('After', global_step + 1, 'updates: ', end='\n\n') evaluate(all_args, model, valid_dataset, ignore_index) generate_sample(valid_dataset, tokenizer, model, num=2, eval_step=True, device=all_args.device) watch_metrics(all_args, model, tokenizer, valid_dataset, num=50, mode='val') # сохраняем обученную модель каждую эпоху new_model_dir = os.path.join(all_args.model_dir, str(epoch_number)) os.mkdir(new_model_dir) model.save_pretrained(new_model_dir)
def train(model, provider, optimizer, criterion, iterations, device, model_comb, scheduler=None): mean_losses = [] losses = [] try: for iteration in tqdm(range(iterations)): if not scheduler is None: scheduler.step() data, targets = provider.get_batch() data, targets = torch.tensor(data=data, dtype=torch.long).to(device), torch.tensor(data=targets, dtype=torch.long).to(device) model.zero_grad() target_preds = F.log_softmax(model(data), dim=1) loss = criterion(target_preds, targets) loss.backward() optimizer.step() losses.append(loss.cpu().item()) if len(losses) == 1000: mean_losses.append(np.mean(losses)) losses = [] if iteration % 100000 == 0 or iteration == iterations - 1: with torch.no_grad(): sample = generate_sample(model, provider, device=device, length=100, temperature=1.0) midi = provider.to_midi(sample) midi.write(f'../music/{model_comb}/sample_{iteration}.midi') except KeyboardInterrupt: pass return mean_losses
def get_mean_hamming(data, test_count=10, size=50): test_results = [] for i in xrange(test_count): observations, result_states = generate_sample(data, size=size) prob, path = viterbi(observations, data.start_probability, data.transition_probability, data.emission_probability) hamming = [0 if a == b else 1 for a, b in zip(path, result_states)] test_results.append(sum(hamming)) # print ''.join(map(str, path)) # print ''.join(map(str, result_states)) # print ''.join(map(str, hamming)) np_res = np.array(test_results) return np_res.mean()
def gen(): return generate_sample(data_tr_sampled, tokenizer, qids_name_tr, voc_size_mnt, voc_size_ent, BATCH, nlines)
loss = loss/args.gradient_accumulation_steps loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 writer.add_scalar('lr', scheduler.get_lr()[0], global_step) writer.add_scalar('loss', (tr_loss - logging_loss)/args.gradient_accumulation_steps, global_step) logging_loss = tr_loss print("loss:", loss.item(), end='\n\n') if (step + 1)/args.gradient_accumulation_steps == 1.0: print('After 1st update: ', end='\n\n') generate_sample(valid_dataset, tokenizer, num=2, eval_step=False,device=args.device) if (step + 1) % (10*args.gradient_accumulation_steps) == 0: results = evaluate(args, model, valid_dataset, ignore_index, global_step) for key, value in results.items(): writer.add_scalar('eval_{}'.format(key), value, global_step) print('After', global_step+1,'updates: ', end='\n\n') generate_sample(valid_dataset, tokenizer, num=2, eval_step=True,device=args.device) def evaluate(args, model, eval_dataset, ignore_index, global_step=None): """ Returns perplexity score on validation dataset. Args: args: dict that contains all the necessary information passed by user while training
def train(args, model, tokenizer, train_dataset, valid_dataset, ignore_index): # """Trains GPT2 model and logs necessary details. # Args: # args: dict that contains all the necessary information passed by user while training # model: finetuned gpt/gpt2 model # tokenizer: GPT/GPT2 tokenizer # train_dataset: GPT21024Dataset object for training data # ignore_index: token not considered in loss calculation # """ writer = SummaryWriter('./logs') train_sampler = RandomSampler(train_dataset) train_dl = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.batch_size, num_workers=args.num_workers) loss_fct = CrossEntropyLoss( ignore_index=ignore_index) #ignores padding token for loss calculation optimizer = AdamW(model.parameters(), lr=args.lr) scheduler = WarmupLinearSchedule(optimizer, 100, 80000) global_step = 0 tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = tnrange(int(args.num_train_epochs), desc="Epoch") set_seed(args) for _ in train_iterator: epoch_iterator = tqdm(train_dl, desc="Training") for step, batch in enumerate(epoch_iterator): inputs, labels = torch.tensor(batch['article']), torch.tensor( batch['article']) inputs = inputs.to(args.device) labels = labels.to(args.device) model.train() logits = model(inputs)[0] idx = batch['sum_idx'].item() # index of separator token # only consider loss on reference summary just like seq2seq models shift_logits = logits[..., idx:-1, :].contiguous() shift_labels = labels[..., idx + 1:].contiguous() loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) loss = loss / args.gradient_accumulation_steps loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 writer.add_scalar('lr', scheduler.get_lr()[0], global_step) writer.add_scalar('loss', (tr_loss - logging_loss) / args.gradient_accumulation_steps, global_step) logging_loss = tr_loss print("loss:", loss.item(), end='\n\n') if (step + 1) / args.gradient_accumulation_steps == 1.0: print('After 1st update: ', end='\n\n') generate_sample(valid_dataset, tokenizer, model, num=2, eval_step=False) if (step + 1) % (10 * args.gradient_accumulation_steps) == 0: results = evaluate(args, model, valid_dataset, ignore_index, global_step) for key, value in results.items(): writer.add_scalar('eval_{}'.format(key), value, global_step) print('After', global_step + 1, 'updates: ', end='\n\n') generate_sample(valid_dataset, tokenizer, model, num=2, eval_step=True)
loss = loss/args.gradient_accumulation_steps loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 writer.add_scalar('lr', scheduler.get_lr()[0], global_step) writer.add_scalar('loss', (tr_loss - logging_loss)/args.gradient_accumulation_steps, global_step) logging_loss = tr_loss print("loss:", loss.item(), end='\n\n') if (step + 1)/args.gradient_accumulation_steps == 1.0: print('After 1st update: ', end='\n\n') generate_sample(valid_dataset, tokenizer, num=2, eval_step=False) if (step + 1) % (10*args.gradient_accumulation_steps) == 0: results = evaluate(args, model, valid_dataset, ignore_index, global_step) for key, value in results.items(): writer.add_scalar('eval_{}'.format(key), value, global_step) print('After', global_step+1,'updates: ', end='\n\n') generate_sample(valid_dataset, tokenizer, num=2, eval_step=True) def evaluate(args, model, eval_dataset, ignore_index, global_step=None): """ Returns perplexity score on validation dataset. Args: args: dict that contains all the necessary information passed by user while training
'volume', 'numberOfTrades', 'var_ema', 'var_bollinger', 'var_stoch', 'RSI' ] categorical_columns = [ 'rsi_indicator', 'stoch_indicator', 'CDL2CROWS', 'CDL3BLACKCROWS', 'CDL3INSIDE', 'CDL3LINESTRIKE', 'CDL3OUTSIDE', 'CDL3STARSINSOUTH', 'CDL3WHITESOLDIERS', 'CDLABANDONEDBABY', 'CDLADVANCEBLOCK', 'CDLBELTHOLD', 'CDLBREAKAWAY', 'CDLCLOSINGMARUBOZU', 'CDLCONCEALBABYSWALL', 'CDLCOUNTERATTACK', 'CDLDARKCLOUDCOVER', 'CDLDOJI', 'CDLDOJISTAR', 'CDLDRAGONFLYDOJI', 'CDLENGULFING', 'CDLEVENINGDOJISTAR', 'CDLEVENINGSTAR', 'CDLGAPSIDESIDEWHITE', 'CDLGRAVESTONEDOJI', 'CDLHAMMER', 'CDLHANGINGMAN', 'CDLHARAMI', 'CDLHARAMICROSS', 'CDLHIGHWAVE', 'CDLHIKKAKE', 'CDLHIKKAKEMOD', 'CDLHOMINGPIGEON', 'CDLIDENTICAL3CROWS', 'CDLINNECK', 'CDLINVERTEDHAMMER', 'CDLKICKING', 'CDLKICKINGBYLENGTH', 'CDLLADDERBOTTOM', 'CDLLONGLEGGEDDOJI', 'CDLLONGLINE', 'CDLMARUBOZU', 'CDLMATCHINGLOW', 'CDLMATHOLD', 'CDLMORNINGDOJISTAR', 'CDLMORNINGSTAR', 'CDLONNECK', 'CDLPIERCING', 'CDLRICKSHAWMAN', 'CDLRISEFALL3METHODS', 'CDLSEPARATINGLINES', 'CDLSHOOTINGSTAR', 'CDLSHORTLINE', 'CDLSPINNINGTOP', 'CDLSTALLEDPATTERN', 'CDLSTICKSANDWICH', 'CDLTAKURI', 'CDLTASUKIGAP', 'CDLTHRUSTING', 'CDLTRISTAR', 'CDLUNIQUE3RIVER', 'CDLUPSIDEGAP2CROWS', 'CDLXSIDEGAP3METHODS' ] model = RandomForestClassifier(n_estimators=60, max_depth=7, min_samples_leaf=5) SAMPLE_SIZE = utils.generate_sample(df_staging, df_master) utils.check_distribution(SAMPLE_SIZE, df_staging, df_master, mode, model, continuous_columns, categorical_columns)
def run_on_test_data(data, size=10): observations, _ = generate_sample(data, size=size) return run_on_seq(observations), observations
shutil.copy('opt.py', os.path.join(result, 'opt.py')) # setup models generator = Generator(opt.n_charactor) discriminator = Discriminator() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() generator.to_gpu() discriminator.to_gpu() # setup optimizers opt_g = set_opt(generator, chainer.optimizers.Adam(opt.lr_gen, beta1=0.5), chainer.optimizer.GradientClipping(10), chainer.optimizer.WeightDecay(0.0001)) opt_d = set_opt(discriminator, chainer.optimizers.Adam(opt.lr_dis, beta1=0.5), chainer.optimizer.GradientClipping(10), chainer.optimizer.WeightDecay(0.0001)) # setup trainer updater = DFGUpdater(opt_g, opt_d, train_iter, args.gpu) trainer = chainer.training.Trainer(updater, opt.trigger, out=result) # setup extensions trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) trainer.extend(extensions.PrintReport( ['iteration', 'loss/recon', 'loss/h', 'loss/gan/gen', 'loss/gan/dis']), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=100)) trainer.extend(generate_sample(generator, valid_iter, args.gpu)) trainer.run()