def transformer(*args, **kwargs): """ Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017) <https://arxiv.org/abs/1706.03762>`_. """ parser = options.get_interactive_generation_parser() model = TransformerModel.from_pretrained(parser, *args, **kwargs) return model
def fconv(*args, **kwargs): """ A fully convolutional model, i.e. a convolutional encoder and a convolutional decoder, as described in `"Convolutional Sequence to Sequence Learning" (Gehring et al., 2017) <https://arxiv.org/abs/1705.03122>`_. """ parser = options.get_interactive_generation_parser() model = FConvModel.from_pretrained(parser, *args, **kwargs) return model
def cli_main(): parser = options.get_interactive_generation_parser() parser.add_argument('--prompts', type=str, default=None, required=True) parser.add_argument('--output', type=str, default=None, required=True) parser.add_argument('--debug', action='store_true') parser.add_argument('--samples-per-prompt', type=int, default=1) args = options.parse_args_and_arch(parser) np.random.seed(args.seed) utils.set_torch_seed(args.seed) main(args)
def cli_main(): parser = options.get_interactive_generation_parser() args = options.parse_args_and_arch(parser) distributed_utils.call_main(convert_namespace_to_omegaconf(args), main)
def cli_main(): parser = options.get_interactive_generation_parser() args = options.parse_args_and_arch(parser) distributed_utils.call_main(args, main)
def __init__(self): parser = options.get_interactive_generation_parser() args = options.parse_args_and_arch(parser) cfg = convert_namespace_to_omegaconf(args) utils.import_user_module(cfg.common) if cfg.interactive.buffer_size < 1: cfg.interactive.buffer_size = 1 if cfg.dataset.max_tokens is None and cfg.dataset.batch_size is None: cfg.dataset.batch_size = 1 assert (not cfg.generation.sampling or cfg.generation.nbest == cfg.generation.beam ), "--sampling requires --nbest to be equal to --beam" assert (not cfg.dataset.batch_size or cfg.dataset.batch_size <= cfg.interactive.buffer_size ), "--batch-size cannot be larger than --buffer-size" use_cuda = torch.cuda.is_available() and not cfg.common.cpu # Setup task, e.g., translation task = tasks.setup_task(cfg.task) # Load ensemble models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(cfg.common_eval.path), task=task, suffix=cfg.checkpoint.checkpoint_suffix, strict=(cfg.checkpoint.checkpoint_shard_count == 1), num_shards=cfg.checkpoint.checkpoint_shard_count, ) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Optimize ensemble for generation for model in models: if model is None: continue if cfg.common.fp16: model.half() if use_cuda and not cfg.distributed_training.pipeline_model_parallel: model.cuda() model.prepare_for_inference_(cfg) # Initialize generator generator = task.build_generator(models, cfg.generation) # Handle tokenization and BPE tokenizer = encoders.build_tokenizer(cfg.tokenizer) bpe = encoders.build_bpe(cfg.bpe) # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(cfg.generation.replace_unk) max_positions = utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]) if cfg.interactive.buffer_size > 1: logger.info("Sentence buffer size: %s", cfg.interactive.buffer_size) self.context = { 'bpe': bpe, 'tokenizer': tokenizer, 'cfg': cfg, 'task': task, 'max_positions': max_positions, 'use_cuda': use_cuda, 'generator': generator, 'models': models, 'src_dict': src_dict, 'tgt_dict': tgt_dict, 'align_dict': align_dict, }
def cli_main(): parser = options.get_interactive_generation_parser() parser.add_argument( "--prefix-length", type=int, default=1, help="Prompt prefix length (including <s>)", ) parser.add_argument( "--duration-scale", type=float, default=1, help="Multiply durations by the given scaler", ) parser.add_argument("--debug", action="store_true", help="Process only the first batch") parser.add_argument("--n_hypotheses", type=int, default=1) parser.add_argument("--filter-names", type=str, default=None) parser.add_argument("--max-length", type=int, default=200, help="Maximal produced length") parser.add_argument("--teacher-force-tokens", action="store_true", default=False) parser.add_argument("--teacher-force-duration", action="store_true", default=False) parser.add_argument("--teacher-force-f0", action="store_true", default=False) parser.add_argument("--copy-target", action="store_true", default=False) parser.add_argument("--min-length", type=int, default=None) parser.add_argument("--f0-discretization-bounds", type=str, default=None) parser.add_argument("--dequantize-prosody", action="store_true") parser.add_argument("--batch-explosion-rate", type=int, default=1) parser.add_argument( "--metric", choices=["continuation", "teacher_force_everything", "correlation"], required=True, ) parser.add_argument("--wandb", action="store_true") parser.add_argument("--wandb-project-name", type=str, default="eslm") parser.add_argument("--wandb-tags", type=str, default="") parser.add_argument("--wandb-run-name", type=str, default="") parser.add_argument("--T-token", type=float, default=1.0) parser.add_argument("--T-duration", type=float, default=1.0) parser.add_argument("--T-f0", type=float, default=1.0) parser.add_argument("--n-workers", type=int, default=1) parser.add_argument("--eval-subset", type=str, default="valid", choices=["valid", "test"]) args = options.parse_args_and_arch(parser) assert (args.prefix_length >= 1), "Prefix length includes bos token <s>, hence the minimum is 1." assert args.temperature >= 0.0, "T must be non-negative!" if args.dequantize_prosody: assert args.f0_discretization_bounds world_size = args.n_workers or torch.cuda.device_count() if world_size > 1: import random mp.set_start_method("spawn", force=True) os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(random.randint(10_000, 50_000)) mp.spawn( main, nprocs=world_size, args=( world_size, args, ), join=True, ) else: main(rank=0, world_size=world_size, args=args)
def cli_main(): parser = options.get_interactive_generation_parser() args = options.parse_args_and_arch(parser) main(args)
def from_checkpoint(self, checkpoint, roberta_cache_path=None, inspector=None): ''' Initialize model from checkpoint ''' # load fairseq task parser = options.get_interactive_generation_parser() options.add_optimization_args(parser) args = options.parse_args_and_arch(parser, input_args=['--data dummy']) # Read extra arguments model_folder = os.path.dirname(checkpoint.split(':')[0]) # config with fairseq-preprocess and fairseq-train args config_json = f'{model_folder}/config.json' assert os.path.isfile(config_json), \ "Model trained with v0.3.0 or above?" with open(config_json) as fid: extra_args = json.loads(fid.read()) prepro_args = extra_args['fairseq_preprocess_args'] train_args = extra_args['fairseq_train_args'] # extra args by hand args.source_lang = 'en' args.target_lang = 'actions' args.path = checkpoint args.roberta_cache_path = roberta_cache_path dim = train_args['--pretrained-embed-dim'][0] args.model_overrides = \ "{'pretrained_embed_dim':%s, 'task': 'translation'}" % dim assert bool(args.left_pad_source), "Only left pad supported" # dictionaries src_dict_path = f'{model_folder}/dict.{args.source_lang}.txt' tgt_dict_path = f'{model_folder}/dict.{args.target_lang}.txt' assert os.path.isfile(src_dict_path), \ f"Missing {src_dict_path}.\nModel trained with v0.3.0 or above?"\ "\ncheck scripts/stack-transformer/update_model_to_v0.3.0.sh" assert os.path.isfile(tgt_dict_path), \ f"Missing {tgt_dict_path}.\nModel trained with v0.3.0 or above?"\ "\ncheck scripts/stack-transformer/update_model_to_v0.3.0.sh" src_dict = Dictionary.load(src_dict_path) tgt_dict = Dictionary.load(tgt_dict_path) use_cuda = torch.cuda.is_available() and not args.cpu # Override task to ensure compatibility with old models and overide # TODO: Task may not be even needed task = TranslationTask(args, src_dict, tgt_dict) model = load_models(args, task, use_cuda) # Load RoBERTa embeddings = PretrainedEmbeddings( name=prepro_args['--pretrained-embed'][0], bert_layers=[int(x) for x in prepro_args['--bert-layers']] if '--bert-layers' in prepro_args else None, model=load_roberta(name=prepro_args['--pretrained-embed'][0], roberta_cache_path=args.roberta_cache_path, roberta_use_gpu=use_cuda)) print("Finished loading models") # State machine variables machine_rules = f'{model_folder}/train.rules.json' assert os.path.isfile(machine_rules), f"Missing {machine_rules}" machine_type = prepro_args['--machine-type'][0] return self(model, machine_rules, machine_type, src_dict, tgt_dict, use_cuda, embeddings=embeddings, inspector=inspector)
def fconv_self_att(*args, **kwargs): parser = options.get_interactive_generation_parser() model = FConvModelSelfAtt.from_pretrained(parser, *args, **kwargs) return model
def cli_main(): parser = options.get_interactive_generation_parser() parser.add_argument( "--prefix-length", type=int, default=1, help="Prompt prefix length (including <s>)", ) parser.add_argument("--output", type=str, default=None, required=True) parser.add_argument("--debug", action="store_true", help="Process only the first batch") parser.add_argument( "--ignore-durations", action="store_true", help="If set, the duration stream is ignored", ) parser.add_argument("--max-length", type=int, default=200, help="Maximal produced length") parser.add_argument("--code-type", choices=["cpc_km100", "hubert"], default="cpc_km100") parser.add_argument("--max-samples", type=int, default=None) parser.add_argument("--prompt-duration-scaler", type=float, default=1.0) parser.add_argument("--teacher-force-tokens", action="store_true", default=False) parser.add_argument("--teacher-force-duration", action="store_true", default=False) parser.add_argument("--teacher-force-f0", action="store_true", default=False) parser.add_argument("--filter-names", type=str, default=None) parser.add_argument( "--match-duration", action="store_true", help="Do not produce sequences longer that ground-truth", ) parser.add_argument( "--cut-prompt", action="store_true", help="Remove prompt from the produced audio", ) parser.add_argument("--short-curcuit", action="store_true", help="Use 'target' as a sample") parser.add_argument("--f0-discretization-bounds", type=str, default=None) parser.add_argument("--batch-explosion-rate", type=int, default=1) parser.add_argument("--T-token", type=float, default=1.0) parser.add_argument("--T-duration", type=float, default=1.0) parser.add_argument("--T-f0", type=float, default=1.0) parser.add_argument("--subset", type=str, default="valid", choices=["test", "valid"]) args = options.parse_args_and_arch(parser) assert (args.prefix_length >= 1), "Prefix length includes bos token <s>, hence the minimum is 1." assert all(t >= 0 for t in [args.T_token, args.T_f0, args.T_duration ]), "T must be non-negative!" world_size = torch.cuda.device_count() if world_size > 1: import random mp.set_start_method("spawn", force=True) os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = str(random.randint(10_000, 50_000)) print( f"Using {world_size} devices, master port {os.environ['MASTER_PORT']}" ) mp.spawn( main, nprocs=world_size, args=( world_size, args, ), join=True, ) else: main(rank=0, world_size=world_size, args=args)
def setup(source_lang,target_lang): sys.argv = sys.argv[:1] sys.argv.append('--path') sys.argv.append('model/checkpoints_' + source_lang + '_' + target_lang +'.pt') sys.argv.append('model/') sys.argv.append('--beam') sys.argv.append('5') sys.argv.append('--source-lang') sys.argv.append(source_lang) sys.argv.append('--target-lang') sys.argv.append(target_lang) sys.argv.append('--tokenizer') sys.argv.append('space') sys.argv.append('--bpe') sys.argv.append('bert') sys.argv.append('--bpe-vocab-file') sys.argv.append('model/' + '/dict.' + source_lang + '.txt') # sys.argv.append('--no-repeat-ngram-size') # sys.argv.append('2') sys.argv parser = options.get_interactive_generation_parser() args = options.parse_args_and_arch(parser) utils.import_user_module(args) if args.buffer_size < 1: args.buffer_size = 1 if args.max_tokens is None and args.max_sentences is None: args.max_sentences = 1 assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert not args.max_sentences or args.max_sentences <= args.buffer_size, \ '--max-sentences/--batch-size cannot be larger than --buffer-size' #logger.info(args) #print many info use_cuda = torch.cuda.is_available() and not args.cpu # Setup task, e.g., translation task = tasks.setup_task(args) # Load ensemble logger.info('loading model(s) from {}'.format(args.path)) models, _model_args = checkpoint_utils.load_model_ensemble( args.path.split(os.pathsep), arg_overrides=eval(args.model_overrides), task=task, suffix=getattr(args, "checkpoint_suffix", ""), ) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() if use_cuda: model.cuda() # Initialize generator generator = task.build_generator(models, args) # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) max_positions = utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models] ) if args.buffer_size > 1: logger.info('Sentence buffer size: %s', args.buffer_size) return args, task, max_positions, use_cuda, generator, models, tgt_dict, src_dict, align_dict
def make_parser(): """Note: As the names indicate use s2x_args(ex:ST, ASR etc) for models with speech input, x2s_args for models with speech output(ex:TTS) and mt_args for translation models (ex: mt, T2U etc). For direct S2ST models, use x2s_args to provide model details. """ parser = options.get_speech_generation_parser() parser.add_argument("--target-is-code", action="store_true", default=False) parser.add_argument("--config", type=str) parser.add_argument( "--model-type", default="S2U", choices=[ "S2S", "TTS", "S2UT", "MT", "S2T", "2StageS2ST", "3StageS2ST" ], help= "Choose one of the models. For model inference implementation, refer to core.py", ) parser.add_argument( "--dataset-path", type=str, help="""File to load dataset from. Assumes dataset is a list of samples. Each sample is a dict of format {'net_input':{'src_tokens':torch.tenor(),'src_lengths':torch.tensor()}}""", ) parser.add_argument( "--dataset-type", type=str, default="npy", choices=["npy", "raw"], help="""Type of input dataset file""", ) parser.add_argument( "--read-using-sf", type=str, default=False, help="""If sound file should be used to read the raw dataset""", ) parser.add_argument( "--dataset-size", default=None, type=int, help="Dataset size to use for benchmarking", ) parser.add_argument( "--dump-speech-waveforms-dir", default=None, type=str, help="Directory to dump the speech waveforms computed on the dataset.", ) parser.add_argument( "--dump-waveform-file-prefix", default="", type=str, help="File name prefix for the saved speech waveforms", ) parser.add_argument("--feat-dim", default=80, type=int, help="Input feature dimension") parser.add_argument( "--target-sr", default=16000, type=int, help="Target sample rate for dumping waveforms", ) options.add_generation_args(parser) options.get_interactive_generation_parser(parser) return parser
def cli_main(): parser = options.get_interactive_generation_parser() parser.add_argument('--transformer-big-zhen', action='store_true') args = options.parse_args_and_arch(parser) distributed_utils.call_main(args, main)