# Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) logging.info('device: {}'.format(args.device)) logging.info('Hyper params:%r' % args.__dict__) # Create the input data pipeline logging.info('Loading the datasets...') bl = BatchLoader(args) ## Load train and dev data train_data = bl.load_data('train.json') dev_data = bl.load_data('dev.json') ## Train data ner_train_data, re_train_data = bl.build_data(train_data, is_train=True) train_bls = bl.batch_loader(ner_train_data, re_train_data, args.ner_max_len, args.re_max_len, args.batch_size, is_train=True) num_batchs_per_task = [len(train_bl) for train_bl in train_bls] logging.info( 'num of batch per task for train: {}'.format(num_batchs_per_task)) train_task_ids = sum([[i] * num_batchs_per_task[i] for i in range(len(num_batchs_per_task))], []) shuffle(train_task_ids) ## Dev data ner_dev_data, _ = bl.build_data(dev_data, is_train=False) dev_bl = bl.batch_loader(ner_dev_data,