Exemplo n.º 1
0
            return 'turn to'
        elif v == 'work':
            return 'work with'
        else:
            return v

    while True:

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn1': random.choice(nouns_s),
            'nn2': random.choice(animates),
            'vbd': random.choice(verbs_past),
            'vbd2': random.choice(verbs_past),  # used in template2 only
            'vb': random.choice(verbs_base),  # used in template 1 only
        }

        slot2filler['vb'] = add_preposition_after_vb(slot2filler['vb'])

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good


if __name__ == '__main__':
    for n, s in enumerate(collect_unique_pairs(main)):
        print(f'{n//2+1:>12,}', s)
Exemplo n.º 2
0
        f'Making test sentences for {phenomenon} {paradigm} with vocab={configs.Data.vocab_name}'
    )
    print(
        '***************************************************************************************'
    )

    try:
        paradigm_module = importlib.import_module(
            f'zorro.{phenomenon}.{paradigm}')
    except RuntimeError as e:
        print(e)
        print(f'Skipping {paradigm}')
        continue

    # generate sentences once, in order to save the same sentences to two locations
    sentences = list(collect_unique_pairs(paradigm_module.main))
    assert sentences

    # capitalize proper nouns for case-sensitive models like roberta-base
    sentences = [capitalize_names_in_sentence(s) for s in sentences]

    # TODO save info about each sentence's template in the text file saved locally

    # save each file in repository, and also on shared drive
    for out_path in [
            Path("../sentences") / configs.Data.vocab_name /
            f'{phenomenon}-{paradigm}.txt',
            SECONDARY_OUT_PATH / configs.Data.vocab_name /
            f'{phenomenon}-{paradigm}.txt',
    ]:
        if not out_path.parent.is_dir():