示例#1
0
def cli_main():
    import argparse
    parser = argparse.ArgumentParser(
        description=
        "Downloading/Decompressing CodeSearchNet dataset(s) or Tree-Sitter Library(ies)"
    )
    parser.add_argument("--yaml_file",
                        "-f",
                        type=str,
                        help="load {language}.yml for train",
                        default='config/csn_feng/ruby')
    parser.add_argument(
        '--out_file',
        '-o',
        type=str,
        help='output generated file',
        default=None,
    )
    args = parser.parse_args()
    yaml_file = os.path.join(os.path.dirname(__file__),
                             f"{args.yaml_file}.yml")
    out_file = None if args.out_file is None else recursive_expanduser(
        args.out_file)
    LOGGER.info('Load arguments in {}'.format(yaml_file))
    args = load_yaml(yaml_file)
    LOGGER.info(args)
    main(args, out_file)
示例#2
0
def load_state(model_path):
    state = load_checkpoint_to_cpu(model_path, arg_overrides={})
    args = state["args"]
    args = recursive_contractuser(args)
    args = recursive_expanduser(args)
    task = tasks.setup_task(args)  # load src/tgt dicts
    model = task.build_model(args)
    model.load_state_dict(state["model"])
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if args['common']['fp16'] and use_cuda:
        model.half()
    if use_cuda:
        torch.cuda.empty_cache()
        torch.cuda.set_device(torch.cuda.device_count() - 1)
        model.cuda()
    model.eval()
    del state
    return args, task, model, use_cuda
示例#3
0
def main(model_path, input):
    LOGGER.info('Load model from {}'.format(model_path))
    state = load_checkpoint_to_cpu(model_path, arg_overrides={})
    args = state["args"]
    args = recursive_contractuser(args, old_cache_name='.ncc')
    args = recursive_expanduser(args)
    task = tasks.setup_task(args)  # load src/tgt dicts
    model = task.build_model(args)
    model.load_state_dict(state["model"])
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if use_cuda:
        torch.cuda.empty_cache()
        torch.cuda.set_device(torch.cuda.device_count() - 1)
        model.cuda()
    model.eval()
    if args['common']['fp16'] and use_cuda:
        model.half()

    sample = task.encode_input(input)
    sample = utils.move_to_cuda(sample) if use_cuda else sample
    generator = task.sequence_completor
    net_output = generator.complete(models=[model], sample=sample)
    out = task.decode_output(net_output)
    return out
示例#4
0
from dataset.codexglue.code_to_text import (
    LANGUAGES,
    MODES,
)
from ncc import tasks
from ncc.data import (
    Dictionary,
    indexed_dataset,
)
from ncc.utils.file_ops.yaml_io import recursive_expanduser
from ncc.utils.file_ops import file_io
from ncc.utils.path_manager import PathManager

if __name__ == '__main__':
    task = tasks.get_task('multilingual_denoising')
    base_dir = recursive_expanduser(
        '~/ncc_data/codexglue/code_to_text/multilingual_denoising/data-mmap')

    dict_file = os.path.join(base_dir, 'dict.jsonl')
    vocab = task.load_dictionary(dict_file)

    for mode in MODES:
        dst_file = os.path.join(base_dir, 'docstring', f"{mode}.docstring.spm")
        PathManager.mkdir(os.path.dirname(dst_file))
        # mmap
        ds = indexed_dataset.make_builder(f'{dst_file}.mmap',
                                          impl='mmap',
                                          vocab_size=len(vocab))
        for lang in LANGUAGES:
            src_file = os.path.join(base_dir, lang, f"{mode}.docstring.spm")
            ds.merge_file_(src_file)
        ds.finalize(f'{dst_file}.idx')
示例#5
0
def cli_main(model_path, input, kwargs='{}'):
    model_path = recursive_expanduser(model_path)
    input = re.sub(r'\s+', ' ', input).strip()
    kwargs = ujson.loads(kwargs)
    kwargs['topk'] = kwargs.get('topk', 5)
    return main(model_path, input, **kwargs)