def load_train_val_examples(args): lines = [] for guid, text, _, entities in train_data_generator(args.train_file, args.eval_file): sl = LabeledText(guid, text, entities) lines.append({'guid': guid, 'text': text, 'entities': entities}) allow_overlap = args.allow_overlap if args.num_augements > 0: allow_overlap = False train_base_examples = load_ner_labeled_examples( lines, ner_labels, seg_len=args.seg_len, seg_backoff=args.seg_backoff, num_augements=args.num_augements, allow_overlap=allow_overlap) train_examples, val_examples = split_train_eval_examples( train_base_examples, train_rate=args.train_rate, fold=args.fold, shuffle=True) logger.info(f"Loaded {len(train_examples)} train examples, " f"{len(val_examples)} val examples.") return train_examples, val_examples
def load_train_val_examples(args, train_data_generator, glue_labels, shuffle=True, train_rate=0.9, num_augments=0): all_train_examples = load_glue_examples(train_data_generator, args.train_file) if args.train_sample_rate < 1.0: num_samples = int(len(all_train_examples) * args.train_sample_rate) all_train_examples = all_train_examples[:num_samples] # 切分训练集和验证集 # theta 提供split_train_eval_examples辅助函数 from theta.utils import split_train_eval_examples train_examples, val_examples = split_train_eval_examples( all_train_examples, train_rate=args.train_rate, fold=args.fold, shuffle=shuffle) # random.shuffle(all_train_examples) # num_train_examples = int(len(all_train_examples) * args.train_rate) # val_examples = all_train_examples[num_train_examples:] # train_examples = all_train_examples[:num_train_examples] logger.info(f"Loaded {len(train_examples)} train examples, " f"{len(val_examples)} val examples.") return train_examples, val_examples
def load_train_val_examples(args): all_train_examples = load_glue_examples(train_data_generator, args.train_file) # 切分训练集和验证集 # theta 提供split_train_eval_examples辅助函数 from theta.utils import split_train_eval_examples train_examples, val_examples = split_train_eval_examples( all_train_examples, train_rate=args.train_rate, fold=args.fold, shuffle=True) logger.info(f"Loaded {len(train_examples)} train examples, " f"{len(val_examples)} val examples.") return train_examples, val_examples
def load_train_val_examples(args, seg_len=0, seg_backoff=0): train_base_examples = load_examples(args, train_data_generator, train_base_file, seg_len=seg_len, seg_backoff=seg_backoff) train_examples, val_examples = split_train_eval_examples( train_base_examples, train_rate=args.train_rate, fold=fold, shuffle=True, random_state=args.seed) logger.info( f"Loaded {len(train_examples)} train examples, {len(val_examples)} val examples." ) return train_examples, val_examples