Exemplo n.º 1
0
def transformer(*args, **kwargs):
    """
    Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)
    <https://arxiv.org/abs/1706.03762>`_.
    """
    parser = options.get_interactive_generation_parser()
    model = TransformerModel.from_pretrained(parser, *args, **kwargs)
    return model
Exemplo n.º 2
0
def fconv(*args, **kwargs):
    """
    A fully convolutional model, i.e. a convolutional encoder and a
    convolutional decoder, as described in `"Convolutional Sequence to Sequence
    Learning" (Gehring et al., 2017) <https://arxiv.org/abs/1705.03122>`_.
    """
    parser = options.get_interactive_generation_parser()
    model = FConvModel.from_pretrained(parser, *args, **kwargs)
    return model
Exemplo n.º 3
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    parser.add_argument('--prompts', type=str, default=None, required=True)
    parser.add_argument('--output', type=str, default=None, required=True)
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--samples-per-prompt', type=int, default=1)

    args = options.parse_args_and_arch(parser)

    np.random.seed(args.seed)
    utils.set_torch_seed(args.seed)

    main(args)
Exemplo n.º 4
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    args = options.parse_args_and_arch(parser)
    distributed_utils.call_main(convert_namespace_to_omegaconf(args), main)
Exemplo n.º 5
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    args = options.parse_args_and_arch(parser)
    distributed_utils.call_main(args, main)
Exemplo n.º 6
0
    def __init__(self):
        parser = options.get_interactive_generation_parser()
        args = options.parse_args_and_arch(parser)
        cfg = convert_namespace_to_omegaconf(args)
        utils.import_user_module(cfg.common)

        if cfg.interactive.buffer_size < 1:
            cfg.interactive.buffer_size = 1
        if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None:
            cfg.dataset.batch_size = 1

        assert (not cfg.generation.sampling
                or cfg.generation.nbest == cfg.generation.beam
                ), "--sampling requires --nbest to be equal to --beam"
        assert (not cfg.dataset.batch_size
                or cfg.dataset.batch_size <= cfg.interactive.buffer_size
                ), "--batch-size cannot be larger than --buffer-size"

        use_cuda = torch.cuda.is_available() and not cfg.common.cpu

        # Setup task, e.g., translation
        task = tasks.setup_task(cfg.task)

        # Load ensemble
        models, _model_args = checkpoint_utils.load_model_ensemble(
            utils.split_paths(cfg.common_eval.path),
            task=task,
            suffix=cfg.checkpoint.checkpoint_suffix,
            strict=(cfg.checkpoint.checkpoint_shard_count == 1),
            num_shards=cfg.checkpoint.checkpoint_shard_count,
        )

        # Set dictionaries
        src_dict = task.source_dictionary
        tgt_dict = task.target_dictionary

        # Optimize ensemble for generation
        for model in models:
            if model is None:
                continue
            if cfg.common.fp16:
                model.half()
            if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
                model.cuda()
            model.prepare_for_inference_(cfg)

        # Initialize generator
        generator = task.build_generator(models, cfg.generation)

        # Handle tokenization and BPE
        tokenizer = encoders.build_tokenizer(cfg.tokenizer)
        bpe = encoders.build_bpe(cfg.bpe)

        # Load alignment dictionary for unknown word replacement
        # (None if no unknown word replacement, empty if no path to align dictionary)
        align_dict = utils.load_align_dict(cfg.generation.replace_unk)

        max_positions = utils.resolve_max_positions(
            task.max_positions(), *[model.max_positions() for model in models])
        if cfg.interactive.buffer_size > 1:
            logger.info("Sentence buffer size: %s",
                        cfg.interactive.buffer_size)

        self.context = {
            'bpe': bpe,
            'tokenizer': tokenizer,
            'cfg': cfg,
            'task': task,
            'max_positions': max_positions,
            'use_cuda': use_cuda,
            'generator': generator,
            'models': models,
            'src_dict': src_dict,
            'tgt_dict': tgt_dict,
            'align_dict': align_dict,
        }
Exemplo n.º 7
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    parser.add_argument(
        "--prefix-length",
        type=int,
        default=1,
        help="Prompt prefix length (including <s>)",
    )
    parser.add_argument(
        "--duration-scale",
        type=float,
        default=1,
        help="Multiply durations by the given scaler",
    )
    parser.add_argument("--debug",
                        action="store_true",
                        help="Process only the first batch")
    parser.add_argument("--n_hypotheses", type=int, default=1)
    parser.add_argument("--filter-names", type=str, default=None)
    parser.add_argument("--max-length",
                        type=int,
                        default=200,
                        help="Maximal produced length")

    parser.add_argument("--teacher-force-tokens",
                        action="store_true",
                        default=False)
    parser.add_argument("--teacher-force-duration",
                        action="store_true",
                        default=False)
    parser.add_argument("--teacher-force-f0",
                        action="store_true",
                        default=False)

    parser.add_argument("--copy-target", action="store_true", default=False)
    parser.add_argument("--min-length", type=int, default=None)
    parser.add_argument("--f0-discretization-bounds", type=str, default=None)
    parser.add_argument("--dequantize-prosody", action="store_true")
    parser.add_argument("--batch-explosion-rate", type=int, default=1)

    parser.add_argument(
        "--metric",
        choices=["continuation", "teacher_force_everything", "correlation"],
        required=True,
    )

    parser.add_argument("--wandb", action="store_true")
    parser.add_argument("--wandb-project-name", type=str, default="eslm")
    parser.add_argument("--wandb-tags", type=str, default="")
    parser.add_argument("--wandb-run-name", type=str, default="")

    parser.add_argument("--T-token", type=float, default=1.0)
    parser.add_argument("--T-duration", type=float, default=1.0)
    parser.add_argument("--T-f0", type=float, default=1.0)

    parser.add_argument("--n-workers", type=int, default=1)

    parser.add_argument("--eval-subset",
                        type=str,
                        default="valid",
                        choices=["valid", "test"])

    args = options.parse_args_and_arch(parser)

    assert (args.prefix_length >=
            1), "Prefix length includes bos token <s>, hence the minimum is 1."
    assert args.temperature >= 0.0, "T must be non-negative!"

    if args.dequantize_prosody:
        assert args.f0_discretization_bounds

    world_size = args.n_workers or torch.cuda.device_count()
    if world_size > 1:
        import random

        mp.set_start_method("spawn", force=True)
        os.environ["MASTER_ADDR"] = "localhost"
        os.environ["MASTER_PORT"] = str(random.randint(10_000, 50_000))

        mp.spawn(
            main,
            nprocs=world_size,
            args=(
                world_size,
                args,
            ),
            join=True,
        )
    else:
        main(rank=0, world_size=world_size, args=args)
Exemplo n.º 8
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    args = options.parse_args_and_arch(parser)
    main(args)
    def from_checkpoint(self,
                        checkpoint,
                        roberta_cache_path=None,
                        inspector=None):
        '''
        Initialize model from checkpoint
        '''

        # load fairseq task
        parser = options.get_interactive_generation_parser()
        options.add_optimization_args(parser)
        args = options.parse_args_and_arch(parser, input_args=['--data dummy'])

        # Read extra arguments
        model_folder = os.path.dirname(checkpoint.split(':')[0])
        # config with fairseq-preprocess and fairseq-train args
        config_json = f'{model_folder}/config.json'
        assert os.path.isfile(config_json), \
            "Model trained with v0.3.0 or above?"
        with open(config_json) as fid:
            extra_args = json.loads(fid.read())
        prepro_args = extra_args['fairseq_preprocess_args']
        train_args = extra_args['fairseq_train_args']
        # extra args by hand
        args.source_lang = 'en'
        args.target_lang = 'actions'
        args.path = checkpoint
        args.roberta_cache_path = roberta_cache_path
        dim = train_args['--pretrained-embed-dim'][0]
        args.model_overrides = \
            "{'pretrained_embed_dim':%s, 'task': 'translation'}" % dim
        assert bool(args.left_pad_source), "Only left pad supported"

        # dictionaries
        src_dict_path = f'{model_folder}/dict.{args.source_lang}.txt'
        tgt_dict_path = f'{model_folder}/dict.{args.target_lang}.txt'
        assert os.path.isfile(src_dict_path), \
            f"Missing {src_dict_path}.\nModel trained with v0.3.0 or above?"\
            "\ncheck scripts/stack-transformer/update_model_to_v0.3.0.sh"
        assert os.path.isfile(tgt_dict_path), \
            f"Missing {tgt_dict_path}.\nModel trained with v0.3.0 or above?"\
            "\ncheck scripts/stack-transformer/update_model_to_v0.3.0.sh"
        src_dict = Dictionary.load(src_dict_path)
        tgt_dict = Dictionary.load(tgt_dict_path)

        use_cuda = torch.cuda.is_available() and not args.cpu

        # Override task to ensure compatibility with old models and overide
        # TODO: Task may not be even needed
        task = TranslationTask(args, src_dict, tgt_dict)
        model = load_models(args, task, use_cuda)

        # Load RoBERTa
        embeddings = PretrainedEmbeddings(
            name=prepro_args['--pretrained-embed'][0],
            bert_layers=[int(x) for x in prepro_args['--bert-layers']]
            if '--bert-layers' in prepro_args else None,
            model=load_roberta(name=prepro_args['--pretrained-embed'][0],
                               roberta_cache_path=args.roberta_cache_path,
                               roberta_use_gpu=use_cuda))

        print("Finished loading models")

        # State machine variables
        machine_rules = f'{model_folder}/train.rules.json'
        assert os.path.isfile(machine_rules), f"Missing {machine_rules}"
        machine_type = prepro_args['--machine-type'][0]

        return self(model,
                    machine_rules,
                    machine_type,
                    src_dict,
                    tgt_dict,
                    use_cuda,
                    embeddings=embeddings,
                    inspector=inspector)
Exemplo n.º 10
0
def fconv_self_att(*args, **kwargs):
    parser = options.get_interactive_generation_parser()
    model = FConvModelSelfAtt.from_pretrained(parser, *args, **kwargs)
    return model
Exemplo n.º 11
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    parser.add_argument(
        "--prefix-length",
        type=int,
        default=1,
        help="Prompt prefix length (including <s>)",
    )
    parser.add_argument("--output", type=str, default=None, required=True)
    parser.add_argument("--debug",
                        action="store_true",
                        help="Process only the first batch")
    parser.add_argument(
        "--ignore-durations",
        action="store_true",
        help="If set, the duration stream is ignored",
    )
    parser.add_argument("--max-length",
                        type=int,
                        default=200,
                        help="Maximal produced length")
    parser.add_argument("--code-type",
                        choices=["cpc_km100", "hubert"],
                        default="cpc_km100")
    parser.add_argument("--max-samples", type=int, default=None)
    parser.add_argument("--prompt-duration-scaler", type=float, default=1.0)
    parser.add_argument("--teacher-force-tokens",
                        action="store_true",
                        default=False)
    parser.add_argument("--teacher-force-duration",
                        action="store_true",
                        default=False)
    parser.add_argument("--teacher-force-f0",
                        action="store_true",
                        default=False)
    parser.add_argument("--filter-names", type=str, default=None)
    parser.add_argument(
        "--match-duration",
        action="store_true",
        help="Do not produce sequences longer that ground-truth",
    )
    parser.add_argument(
        "--cut-prompt",
        action="store_true",
        help="Remove prompt from the produced audio",
    )
    parser.add_argument("--short-curcuit",
                        action="store_true",
                        help="Use 'target' as a sample")
    parser.add_argument("--f0-discretization-bounds", type=str, default=None)

    parser.add_argument("--batch-explosion-rate", type=int, default=1)

    parser.add_argument("--T-token", type=float, default=1.0)
    parser.add_argument("--T-duration", type=float, default=1.0)
    parser.add_argument("--T-f0", type=float, default=1.0)

    parser.add_argument("--subset",
                        type=str,
                        default="valid",
                        choices=["test", "valid"])

    args = options.parse_args_and_arch(parser)

    assert (args.prefix_length >=
            1), "Prefix length includes bos token <s>, hence the minimum is 1."
    assert all(t >= 0 for t in [args.T_token, args.T_f0, args.T_duration
                                ]), "T must be non-negative!"

    world_size = torch.cuda.device_count()
    if world_size > 1:
        import random

        mp.set_start_method("spawn", force=True)
        os.environ["MASTER_ADDR"] = "localhost"
        os.environ["MASTER_PORT"] = str(random.randint(10_000, 50_000))

        print(
            f"Using {world_size} devices, master port {os.environ['MASTER_PORT']}"
        )

        mp.spawn(
            main,
            nprocs=world_size,
            args=(
                world_size,
                args,
            ),
            join=True,
        )
    else:
        main(rank=0, world_size=world_size, args=args)
Exemplo n.º 12
0
def setup(source_lang,target_lang):
    sys.argv = sys.argv[:1]
    sys.argv.append('--path')
    sys.argv.append('model/checkpoints_' + source_lang + '_' + target_lang +'.pt')
    sys.argv.append('model/')
    sys.argv.append('--beam')
    sys.argv.append('5')
    sys.argv.append('--source-lang')
    sys.argv.append(source_lang)
    sys.argv.append('--target-lang')
    sys.argv.append(target_lang)
    sys.argv.append('--tokenizer')
    sys.argv.append('space')
    sys.argv.append('--bpe')
    sys.argv.append('bert')
    sys.argv.append('--bpe-vocab-file')
    sys.argv.append('model/' + '/dict.' + source_lang + '.txt')
#     sys.argv.append('--no-repeat-ngram-size')
#     sys.argv.append('2')
    sys.argv
    
    parser = options.get_interactive_generation_parser()
    args = options.parse_args_and_arch(parser)
    
    utils.import_user_module(args)

    if args.buffer_size < 1:
        args.buffer_size = 1
    if args.max_tokens is None and args.max_sentences is None:
        args.max_sentences = 1

    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert not args.max_sentences or args.max_sentences <= args.buffer_size, \
        '--max-sentences/--batch-size cannot be larger than --buffer-size'

    #logger.info(args) #print many info

    use_cuda = torch.cuda.is_available() and not args.cpu
    
    # Setup task, e.g., translation
    task = tasks.setup_task(args)

    # Load ensemble
    logger.info('loading model(s) from {}'.format(args.path))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        args.path.split(os.pathsep),
        arg_overrides=eval(args.model_overrides),
        task=task,
        suffix=getattr(args, "checkpoint_suffix", ""),
    )

    # Set dictionaries
    src_dict = task.source_dictionary
    tgt_dict = task.target_dictionary

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )
        if args.fp16:
            model.half()
        if use_cuda:
            model.cuda()

    # Initialize generator
    generator = task.build_generator(models, args)
    
    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    max_positions = utils.resolve_max_positions(
        task.max_positions(),
        *[model.max_positions() for model in models]
    )

    if args.buffer_size > 1:
        logger.info('Sentence buffer size: %s', args.buffer_size)
    
    return args, task, max_positions, use_cuda, generator, models, tgt_dict, src_dict, align_dict
Exemplo n.º 13
0
def make_parser():
    """Note: As the names indicate use s2x_args(ex:ST, ASR etc) for models with speech input,
    x2s_args for models with speech output(ex:TTS) and mt_args for translation models (ex: mt, T2U etc).
    For direct S2ST models, use x2s_args to provide model details.
    """
    parser = options.get_speech_generation_parser()
    parser.add_argument("--target-is-code", action="store_true", default=False)
    parser.add_argument("--config", type=str)
    parser.add_argument(
        "--model-type",
        default="S2U",
        choices=[
            "S2S", "TTS", "S2UT", "MT", "S2T", "2StageS2ST", "3StageS2ST"
        ],
        help=
        "Choose one of the models. For model inference implementation, refer to core.py",
    )
    parser.add_argument(
        "--dataset-path",
        type=str,
        help="""File to load dataset from. Assumes dataset is a list of samples.
        Each sample is a dict of format {'net_input':{'src_tokens':torch.tenor(),'src_lengths':torch.tensor()}}""",
    )
    parser.add_argument(
        "--dataset-type",
        type=str,
        default="npy",
        choices=["npy", "raw"],
        help="""Type of input dataset file""",
    )
    parser.add_argument(
        "--read-using-sf",
        type=str,
        default=False,
        help="""If sound file should be used to read the raw dataset""",
    )
    parser.add_argument(
        "--dataset-size",
        default=None,
        type=int,
        help="Dataset size to use for benchmarking",
    )
    parser.add_argument(
        "--dump-speech-waveforms-dir",
        default=None,
        type=str,
        help="Directory to dump the speech waveforms computed on the dataset.",
    )
    parser.add_argument(
        "--dump-waveform-file-prefix",
        default="",
        type=str,
        help="File name prefix for the saved speech waveforms",
    )
    parser.add_argument("--feat-dim",
                        default=80,
                        type=int,
                        help="Input feature dimension")
    parser.add_argument(
        "--target-sr",
        default=16000,
        type=int,
        help="Target sample rate for dumping waveforms",
    )

    options.add_generation_args(parser)
    options.get_interactive_generation_parser(parser)
    return parser
Exemplo n.º 14
0
def cli_main():
    parser = options.get_interactive_generation_parser()
    parser.add_argument('--transformer-big-zhen', action='store_true')
    args = options.parse_args_and_arch(parser)
    distributed_utils.call_main(args, main)