def test_play_the_game_less_badly(): bad_seeds_01_env = BadSeeds01(seed_count=5, bad_seed_count=3, max_episode_length=2 * 2 + 3 * 3 + 1) # measure the good seeds twice # measure the bad seeds three times for time_i, seed_i in enumerate( concatv( take( n=2 * len(bad_seeds_01_env.good_seeds), seq=cycle(bad_seeds_01_env.good_seed_indices), ), take( n=3 * len(bad_seeds_01_env.bad_seeds), seq=cycle(bad_seeds_01_env.bad_seed_indices), ), )): next_state, terminal, reward = bad_seeds_01_env.execute(actions=seed_i) assert next_state[time_i, seed_i] != 0.0 assert terminal is False assert reward == 0.0 # measure the first good seed again next_state, terminal, reward = bad_seeds_01_env.execute( actions=bad_seeds_01_env.good_seed_indices[0]) assert next_state[-1, bad_seeds_01_env.good_seed_indices[0]] != 0.0 assert terminal is True # reward is the number of times the least-measured seed was measured assert reward == 2.0
def test_play_the_game_less_badly(): bad_seeds_03_env = BadSeeds03(seed_count=5, bad_seed_count=3, max_episode_length=3 + 2 * 2 + 3 * 3 + 1) # measure the good seeds twice # measure the bad seeds three times for time_i, seed_i in enumerate( concatv( take( n=2 * len(bad_seeds_03_env.good_seeds), seq=cycle(bad_seeds_03_env.good_seed_indices), ), take( n=3 * len(bad_seeds_03_env.bad_seeds), seq=cycle(bad_seeds_03_env.bad_seed_indices), ), )): time_i += 3 next_state, terminal, reward = bad_seeds_03_env.execute(actions=seed_i) assert bad_seeds_03_env.history_array[time_i, seed_i] != 0.0 assert terminal is False assert reward == 0.0 measurement_counts, measured_seed_counts = count_measurements( bad_seeds_03_env.history_array) expected_measurement_counts = np.zeros_like(measurement_counts) expected_measurement_counts[0, bad_seeds_03_env.good_seed_indices] = 5 expected_measurement_counts[0, bad_seeds_03_env.bad_seed_indices] = 6 assert np.all(measurement_counts == expected_measurement_counts) # measure the first good seed again next_state, terminal, reward = bad_seeds_03_env.execute( actions=bad_seeds_03_env.good_seed_indices[0]) print(f"history:\n{bad_seeds_03_env.history_array}") measurement_counts, measured_seed_counts = count_measurements( bad_seeds_03_env.history_array) print(f"measurement_counts: {measurement_counts}") assert next_state[-1, bad_seeds_03_env.good_seed_indices[0]] != 0.0 assert terminal is True # reward is the number of times the least-measured seed was measured assert reward == 6.0 expected_measurement_counts[0, bad_seeds_03_env.good_seed_indices[0]] += 1 assert np.all(measurement_counts == expected_measurement_counts)
def optimize_steps(steps): """ Optimize steps. Currently only optimizes per step type. See the :func:`_optimizer` decorator for more information on how to register an optimizer. :param pbag steps: Collection of steps. :return: a pbag of steps. """ def grouping_fn(step): step_type = type(step) if step_type in _optimizers: return step_type else: return "unoptimizable" steps_by_type = groupby(grouping_fn, steps) unoptimizable = steps_by_type.pop("unoptimizable", []) omg_optimized = concat(_optimizers[step_type](steps) for step_type, steps in steps_by_type.iteritems()) return pbag(concatv(omg_optimized, unoptimizable))
def test_concatv(): assert list(concatv([], [], [])) == [] assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) == ['a', 'b', 0, 1, 2])
from msvdd_bloc import regexes, tokenize from msvdd_bloc.resumes import education from msvdd_bloc.resumes import parse_utils LOGGER = logging.getLogger(__name__) ####################### ## CRF-BASED PARSING ## ####################### FIELD_SEP_TEXTS = { sep for sep in itertoolz.concatv( education.constants.FIELD_SEPS, education.constants.FIELD_SEP_DTS, education.constants.FIELD_SEP_SMS, education.constants.LEFT_BRACKETS, education.constants.RIGHT_BRACKETS, ) } ITEM_SEP_TEXTS = set(education.constants.FIELD_SEP_SMS) INSTITUTION_TEXTS = { "university", "college", "institute", "department", "dept.", "high", "school", "academy", }
from toolz import itertoolz from msvdd_bloc import regexes, tokenize from msvdd_bloc.resumes import constants from msvdd_bloc.resumes import parse_utils from msvdd_bloc.resumes import work LOGGER = logging.getLogger(__name__) FIELD_SEP_TEXTS = { sep for sep in itertoolz.concatv( work.constants.FIELD_SEPS, work.constants.FIELD_SEP_DTS, work.constants.FIELD_SEP_SMS, constants.LEFT_BRACKETS, constants.RIGHT_BRACKETS, ) } COMPANY_TEXTS = set( text.lower() for text in work.constants.COMPANY_TYPES + work.constants.COMPANY_MODIFIERS ) POSITION_TEXTS = set( text.lower() for text in work.constants.POSITION_LEVELS + work.constants.POSITION_TYPES ) def parse_lines(lines, tagger=None):
def main(): # prameters data_dir = '../input/' bert_model = '../bert-large-wwm-uncased' # 把自己large wwm模型路径取代这个bert_model # bert_model = 'bert-base-uncased' # bert_model = './oldtoxic'#使用在老toxic上训练好的预训练模型权重.下载路径:https://www.kaggle.com/qinhui1999/old-toxic-bert-v2 task_name = 'MyPro' output_dir = 'checkpoints/' model_save_pth = 'checkpoints/bert_large_wwm.pth' max_seq_length = 220 do_train = True do_eval = True do_lower_case = True train_batch_size = 56 eval_batch_size = 200 learning_rate = 1e-5 num_train_epochs = 1 warmup_proportion = 0.05 no_cuda = False local_rank = -1 seed = 42 gradient_accumulation_steps = 8 optimize_on_cpu = False fp16 = False save_checkpoints_steps = 50000 loss_scale = 128 # 对模型输入进行处理的processor,git上可能都是针对英文的processor processors = {'mypro': MyPro} if local_rank == -1 or no_cuda: device = torch.device( "cuda" if torch.cuda.is_available() and not no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", local_rank) n_gpu = 1 torch.distributed.init_process_group(backend='nccl') if fp16: logger.info( "16-bits training currently not supported in distributed training" ) fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496) logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(local_rank != -1)) if gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(gradient_accumulation_steps)) train_batch_size = int(train_batch_size / gradient_accumulation_steps) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if n_gpu > 0: torch.cuda.manual_seed_all(seed) if not do_train and not do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if os.path.exists(output_dir) and os.listdir(output_dir): # raise ValueError("Output directory ({}) already exists and is not empty.".format(output_dir)) print('The checkpoint directory is aleady existed...') else: os.makedirs(output_dir, exist_ok=True) task_name = task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=do_lower_case) # print("tokenizer",tokenizer) train_examples = None num_train_steps = None if do_train: train_examples = processor.get_train_examples(data_dir) num_train_steps = int( len(train_examples) / train_batch_size / gradient_accumulation_steps * num_train_epochs) # Prepare model # model = BertForSequenceClassification.from_pretrained(bert_model, num_labels=2, # cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(local_rank)) model = ToxicModel(bert_model, device) # You can unfreeze the last layer of bert by calling set_trainable(model.bert.encoder.layer[23], True) # set_trainable(model.bert, False) # 锁定embedding层 # set_trainable(model.bert.embeddings, False) # set_trainable(model.bert.encoder.layer[11], True) # set_trainable(model.head, True) # model.load_state_dict(torch.load('checkpoints/bert_classification_2epoch.pth')['state_dict']) if fp16: model.half() model.to(device) if local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer if fp16: param_optimizer = [ (n, param.clone().detach().to('cpu').float().requires_grad_()) for n, param in model.named_parameters() ] elif optimize_on_cpu: param_optimizer = [(n, param.clone().detach().to('cpu').requires_grad_()) for n, param in model.named_parameters()] else: param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0 }] t_total = num_train_steps if local_rank != -1: t_total = t_total // torch.distributed.get_world_size() optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate, warmup=warmup_proportion, t_total=t_total) global_step = 0 if do_train: if os.path.exists('train.token_new_cleaned_wwm.npy'): train_features = np.load('train.token_new_cleaned_wwm.npy', allow_pickle=True) else: parallel = Parallel(300, backend="multiprocessing", verbose=5) train_features = list( concatv(*parallel( delayed(convert_examples_to_features)( example, label_list, max_seq_length, tokenizer) for example in list(partition_all(300, train_examples))))) train_features = np.asarray(train_features) np.save('train.token_new_cleaned_wwm', train_features) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", train_batch_size) logger.info(" Num steps = %d", num_train_steps) torch.cuda.empty_cache() all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) print('y_aux', np.asarray([f.y_aux for f in train_features]).shape) all_label_ids = torch.tensor(np.hstack([ np.asarray([f.label_id for f in train_features]), np.asarray([f.y_aux for f in train_features]) ]), dtype=torch.float32) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) if local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader( train_data, sampler=train_sampler, batch_size=train_batch_size, num_workers=2, pin_memory=True, ) #model.load_state_dict(torch.load('checkpoints/bert_large_wwm.pth')['state_dict']) # model.load_state_dict(torch.load('checkpoints/0_80000_iterations.pth')['state_dict']) model.train() best_score = 0 flags = 0 torch.cuda.empty_cache() ''' model.load_state_dict(torch.load('checkpoints/0_20000_iterations.pth')['model']) optimizer.load_state_dict(torch.load('checkpoints/0_20000_iterations.pth')['optimizer']) old_iter = int(torch.load('checkpoints/0_20000_iterations.pth')['iteration']) ''' old_iter = -1 for i_epoch in trange(int(num_train_epochs), desc="Epoch"): torch.cuda.empty_cache() iteration = 0 # counter save_point = save_checkpoints_steps # 10000 for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): if iteration <= old_iter: iteration += 1 continue batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch loss = model(input_ids, segment_ids, input_mask, label_ids) torch.cuda.empty_cache() if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if fp16 and loss_scale != 1.0: # rescale loss for fp16 training # see https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html loss = loss * loss_scale if gradient_accumulation_steps > 1: loss = loss / gradient_accumulation_steps loss.backward() if (step + 1) % gradient_accumulation_steps == 0: if fp16 or optimize_on_cpu: if fp16 and loss_scale != 1.0: # scale down gradients for fp16 training for param in model.parameters(): if param.grad is not None: param.grad.data = param.grad.data / loss_scale is_nan = set_optimizer_params_grad( param_optimizer, model.named_parameters(), test_nan=True) if is_nan: logger.info( "FP16 TRAINING: Nan in gradients, reducing loss scaling" ) loss_scale = loss_scale / 2 model.zero_grad() continue optimizer.step() copy_optimizer_params_to_model( model.named_parameters(), param_optimizer) else: optimizer.step() model.zero_grad() #Save model if iteration % save_point == 0 and iteration > 0: checkpoint = { 'iteration': iteration, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } checkpoint_path = os.path.join( output_dir, '{}_{}_iterations.pth'.format(i_epoch, iteration)) torch.save(checkpoint, checkpoint_path) logging.info('Model saved to {}'.format(checkpoint_path)) val(model, processor, data_dir, max_seq_length, eval_batch_size, label_list, tokenizer, device) iteration += 1 checkpoint = { 'state_dict': model.state_dict(), 'model': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, model_save_pth) val(model, processor, data_dir, max_seq_length, eval_batch_size, label_list, tokenizer, device) test(model, processor, data_dir, max_seq_length, eval_batch_size, label_list, tokenizer, device)