示例#1
0
def load_train_val_examples(args):
    lines = []
    for guid, text, _, entities in train_data_generator(args.train_file, args.eval_file):
        sl = LabeledText(guid, text, entities)
        lines.append({'guid': guid, 'text': text, 'entities': entities})

    allow_overlap = args.allow_overlap
    if args.num_augements > 0:
        allow_overlap = False

    train_base_examples = load_ner_labeled_examples(
        lines,
        ner_labels,
        seg_len=args.seg_len,
        seg_backoff=args.seg_backoff,
        num_augements=args.num_augements,
        allow_overlap=allow_overlap)

    train_examples, val_examples = split_train_eval_examples(
        train_base_examples,
        train_rate=args.train_rate,
        fold=args.fold,
        shuffle=True)

    logger.info(f"Loaded {len(train_examples)} train examples, "
                f"{len(val_examples)} val examples.")
    return train_examples, val_examples
示例#2
0
def load_train_val_examples(args,
                            train_data_generator,
                            glue_labels,
                            shuffle=True,
                            train_rate=0.9,
                            num_augments=0):
    all_train_examples = load_glue_examples(train_data_generator,
                                            args.train_file)

    if args.train_sample_rate < 1.0:
        num_samples = int(len(all_train_examples) * args.train_sample_rate)
        all_train_examples = all_train_examples[:num_samples]

    # 切分训练集和验证集
    # theta 提供split_train_eval_examples辅助函数
    from theta.utils import split_train_eval_examples
    train_examples, val_examples = split_train_eval_examples(
        all_train_examples,
        train_rate=args.train_rate,
        fold=args.fold,
        shuffle=shuffle)

    #  random.shuffle(all_train_examples)
    #  num_train_examples = int(len(all_train_examples) * args.train_rate)
    #  val_examples = all_train_examples[num_train_examples:]
    #  train_examples = all_train_examples[:num_train_examples]

    logger.info(f"Loaded {len(train_examples)} train examples, "
                f"{len(val_examples)} val examples.")
    return train_examples, val_examples
示例#3
0
def load_train_val_examples(args):
    all_train_examples = load_glue_examples(train_data_generator,
                                            args.train_file)

    # 切分训练集和验证集
    # theta 提供split_train_eval_examples辅助函数
    from theta.utils import split_train_eval_examples
    train_examples, val_examples = split_train_eval_examples(
        all_train_examples,
        train_rate=args.train_rate,
        fold=args.fold,
        shuffle=True)

    logger.info(f"Loaded {len(train_examples)} train examples, "
                f"{len(val_examples)} val examples.")
    return train_examples, val_examples
示例#4
0
def load_train_val_examples(args, seg_len=0, seg_backoff=0):
    train_base_examples = load_examples(args,
                                        train_data_generator,
                                        train_base_file,
                                        seg_len=seg_len,
                                        seg_backoff=seg_backoff)

    train_examples, val_examples = split_train_eval_examples(
        train_base_examples,
        train_rate=args.train_rate,
        fold=fold,
        shuffle=True,
        random_state=args.seed)

    logger.info(
        f"Loaded {len(train_examples)} train examples, {len(val_examples)} val examples."
    )
    return train_examples, val_examples