def init_dllogger(log_fpath=None, dummy=False):
    if dummy:
        DLLogger.init(backends=[])
        return
    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, log_fpath),
        StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format,
                      metric_format=stdout_metric_format)
        ]
    )
    DLLogger.metadata("train_loss", {"name": "loss", "format": ":>5.2f"})
    DLLogger.metadata("train_mel_loss", {"name": "mel loss", "format": ":>5.2f"})
    DLLogger.metadata("avg_train_loss", {"name": "avg train loss", "format": ":>5.2f"})
    DLLogger.metadata("avg_train_mel_loss", {"name": "avg train mel loss", "format": ":>5.2f"})
    DLLogger.metadata("val_loss", {"name": "  avg val loss", "format": ":>5.2f"})
    DLLogger.metadata("val_mel_loss", {"name": "  avg val mel loss", "format": ":>5.2f"})
    DLLogger.metadata(
        "val_ema_loss",
        {"name": "  EMA val loss", "format": ":>5.2f"})
    DLLogger.metadata(
        "val_ema_mel_loss",
        {"name": "  EMA val mel loss", "format": ":>5.2f"})
    DLLogger.metadata(
        "train_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"})
    DLLogger.metadata(
        "avg_train_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"})
    DLLogger.metadata(
        "val_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"})
    DLLogger.metadata(
        "val_ema_frames/s", {"name": None, "unit": "frames/s", "format": ":>10.2f"})
    DLLogger.metadata(
        "took", {"name": "took", "unit": "s", "format": ":>3.2f"})
    DLLogger.metadata("lrate_change", {"name": "lrate"})
Пример #2
0
def setup_logger(args):
    aggregator_dict = OrderedDict([
        ('loss', 'average'),
        ('weighted_loss', 'average'),
        ('tokens', ('average', 'performance')),
        ('updates', 'performance'),
        ('gnorm', 'average')
    ])
    os.makedirs(args.save_dir, exist_ok=True)
    log_path = os.path.join(args.save_dir, args.stat_file)

    if os.path.exists(log_path):
        for i in itertools.count():
            s_fname = args.stat_file.split('.')
            fname = '.'.join(s_fname[:-1]) + f'_{i}.' + s_fname[-1] if len(s_fname) > 1 else args.stat_file + f'.{i}'
            log_path = os.path.join(args.save_dir, fname)
            if not os.path.exists(log_path):
                break

    if not args.distributed_world_size > 1 or args.distributed_rank == 0:
        dllogger.init(backends=[JSONStreamBackend(verbosity=1, filename=log_path),
                                AggregatorBackend(verbosity=0, agg_dict=aggregator_dict),
                                TensorBoardBackend(verbosity=1, log_dir=args.save_dir)])
    else:
        dllogger.init(backends=[])
    for k, v in vars(args).items():
        dllogger.log(step='PARAMETER', data={k: v}, verbosity=0)

    container_setup_info = get_framework_env_vars()
    dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0)

    dllogger.metadata('loss', {'unit': 'nat', 'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN'})
    dllogger.metadata('val_loss', {'unit': 'nat', 'GOAL': 'MINIMIZE', 'STAGE': 'VAL'})
    dllogger.metadata('speed', {'unit': 'tokens/s', 'format': ':.3f', 'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN'})
    dllogger.metadata('accuracy', {'unit': 'bleu', 'format': ':.2f', 'GOAL': 'MAXIMIZE', 'STAGE': 'VAL'})
Пример #3
0
	def __init__(self, log_file, global_batch_size, warmup_steps: int = 0, profile: bool = False):
		logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_file), StdOutBackend(Verbosity.VERBOSE)])
		self.warmup_steps = warmup_steps
		self.global_batch_size = global_batch_size
		self.step = 0
		self.profile = profile
		self.timestamps = []
Пример #4
0
def get_dllogger(results):
    return Logger(
        backends=[
            JSONStreamBackend(Verbosity.VERBOSE, os.path.join(results, "logs.json")),
            StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: f"Epoch: {step} "),
        ]
    )
def init(log_fpath, log_dir, enabled=True, tb_subsets=[], **tb_kw):

    if enabled:
        backends = [JSONStreamBackend(Verbosity.DEFAULT,
                                      unique_log_fpath(log_fpath)),
                    StdOutBackend(Verbosity.VERBOSE,
                                  step_format=stdout_step_format,
                                  metric_format=stdout_metric_format)]
    else:
        backends = []

    dllogger.init(backends=backends)
    dllogger.metadata("train_lrate", {"name": "lrate", "format": ":>3.2e"})

    for id_, pref in [('train', ''), ('train_avg', 'avg train '),
                      ('val', '  avg val '), ('val_ema', '  EMA val ')]:

        dllogger.metadata(f"{id_}_loss",
                          {"name": f"{pref}loss", "format": ":>5.2f"})
        dllogger.metadata(f"{id_}_mel_loss",
                          {"name": f"{pref}mel loss", "format": ":>5.2f"})

        dllogger.metadata(f"{id_}_frames/s",
                          {"name": None, "unit": "frames/s", "format": ":>10.2f"})
        dllogger.metadata(f"{id_}_took",
                          {"name": "took", "unit": "s", "format": ":>3.2f"})

    global tb_loggers
    tb_loggers = {s: TBLogger(enabled, log_dir, name=s, **tb_kw)
                  for s in tb_subsets}
def main():
    """
    Launches inference benchmark.
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch FastPitch Inference Benchmark')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    log_file = args.log_file
    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, args.log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})
    DLLogger.log(step="PARAMETER", data={'model_name': 'FastPitch_PyT'})

    model = load_and_setup_model('FastPitch',
                                 parser,
                                 None,
                                 args.amp_run,
                                 'cuda',
                                 unk_args=[],
                                 forward_is_infer=True,
                                 ema=False,
                                 jitable=True)

    # FIXME Temporarily disabled due to nn.LayerNorm fp16 casting bug in pytorch:20.02-py3 and 20.03
    # model = torch.jit.script(model)

    warmup_iters = 3
    iters = 1
    gen_measures = MeasureTime()
    all_frames = 0
    for i in range(-warmup_iters, iters):
        text_padded = torch.randint(low=0,
                                    high=148,
                                    size=(args.batch_size, 128),
                                    dtype=torch.long).to('cuda')
        input_lengths = torch.IntTensor([text_padded.size(1)] *
                                        args.batch_size).to('cuda')
        durs = torch.ones_like(text_padded).mul_(4).to('cuda')

        with torch.no_grad(), gen_measures:
            mels, *_ = model(text_padded, input_lengths, dur_tgt=durs)
        num_frames = mels.size(0) * mels.size(2)

        if i >= 0:
            all_frames += num_frames
            DLLogger.log(step=(i, ), data={"latency": gen_measures[-1]})
            DLLogger.log(step=(i, ),
                         data={"frames/s": num_frames / gen_measures[-1]})

    measures = gen_measures[warmup_iters:]
    DLLogger.log(step=(), data={'avg latency': np.mean(measures)})
    DLLogger.log(step=(), data={'avg frames/s': all_frames / np.sum(measures)})
    DLLogger.flush()
Пример #7
0
def get_logger(params):
    backends = []
    if hvd.rank() == 0:
        backends += [StdOutBackend(Verbosity.VERBOSE)]
        if params.log_dir:
            backends += [JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)]
    logger.init(backends=backends)
    return logger
Пример #8
0
 def _initialize_dllogger(self, log_dir, filename, append):
     backends = [
         JSONStreamBackend(Verbosity.VERBOSE,
                           os.path.join(log_dir, filename),
                           append=append),
         StdOutBackend(Verbosity.VERBOSE),
     ]
     logger.init(backends=backends)
Пример #9
0
 def __init__(self, log_dir, global_batch_size, mode, warmup, dim, profile):
     logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_dir), StdOutBackend(Verbosity.VERBOSE)])
     self.warmup_steps = warmup
     self.global_batch_size = global_batch_size
     self.step = 0
     self.dim = dim
     self.mode = mode
     self.profile = profile
     self.timestamps = []
Пример #10
0
def setup_logger(args):
    aggregator_dict = OrderedDict([('loss', 'average'),
                                   ('weighted_loss', 'average'),
                                   ('tokens', ('average', 'performance')),
                                   ('updates', 'performance'),
                                   ('gnorm', 'average')])
    os.makedirs(args.save_dir, exist_ok=True)
    log_path = os.path.join(args.save_dir, args.stat_file)
    os.makedirs(args.save_dir, exist_ok=True)
    if not args.distributed_world_size > 1 or args.distributed_rank == 0:
        dllogger.init(backends=[
            JSONStreamBackend(verbosity=1, filename=log_path),
            AggregatorBackend(verbosity=0, agg_dict=aggregator_dict),
            TensorBoardBackend(verbosity=1, log_dir=args.save_dir)
        ])
    else:
        dllogger.init(backends=[])
    for k, v in vars(args).items():
        dllogger.log(step='PARAMETER', data={k: v}, verbosity=0)

    container_setup_info = {
        'NVIDIA_PYTORCH_VERSION': os.environ.get('NVIDIA_PYTORCH_VERSION'),
        'PYTORCH_VERSION': os.environ.get('PYTORCH_VERSION'),
        'CUBLAS_VERSION': os.environ.get('CUBLAS_VERSION'),
        'NCCL_VERSION': os.environ.get('NCCL_VERSION'),
        'CUDA_DRIVER_VERSION': os.environ.get('CUDA_DRIVER_VERSION'),
        'CUDNN_VERSION': os.environ.get('CUDNN_VERSION'),
        'CUDA_VERSION': os.environ.get('CUDA_VERSION'),
        'NVIDIA_PIPELINE_ID': os.environ.get('NVIDIA_PIPELINE_ID'),
        'NVIDIA_BUILD_ID': os.environ.get('NVIDIA_BUILD_ID'),
        'NVIDIA_TF32_OVERRIDE': os.environ.get('NVIDIA_TF32_OVERRIDE'),
    }
    dllogger.log(step='PARAMETER', data=container_setup_info, verbosity=0)

    dllogger.metadata('loss', {
        'unit': 'nat',
        'GOAL': 'MINIMIZE',
        'STAGE': 'TRAIN'
    })
    dllogger.metadata('val_loss', {
        'unit': 'nat',
        'GOAL': 'MINIMIZE',
        'STAGE': 'VAL'
    })
    dllogger.metadata('speed', {
        'unit': 'tokens/s',
        'format': ':.3f',
        'GOAL': 'MAXIMIZE',
        'STAGE': 'TRAIN'
    })
    dllogger.metadata('accuracy', {
        'unit': 'bleu',
        'format': ':.2f',
        'GOAL': 'MAXIMIZE',
        'STAGE': 'VAL'
    })
Пример #11
0
def log(logname, dice, results="/results"):
    dllogger = Logger(backends=[
        JSONStreamBackend(Verbosity.VERBOSE, os.path.join(results, logname)),
        StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: ""),
    ])
    metrics = {}
    metrics.update({"Mean dice": round(dice.mean().item(), 2)})
    metrics.update({f"L{j+1}": round(m.item(), 2) for j, m in enumerate(dice)})
    dllogger.log(step=(), data=metrics)
    dllogger.flush()
Пример #12
0
def get_logger(params):
    backends = []
    if params.worker_id == 0 or params.log_all_workers:
        backends += [StdOutBackend(Verbosity.VERBOSE)]
        if params.log_dir:
            os.makedirs(params.log_dir, exist_ok=True)
            log_file = f"{params.log_dir}/log.json"
            backends += [JSONStreamBackend(Verbosity.VERBOSE, log_file)]
    logger.init(backends=backends)
    return logger
Пример #13
0
def get_logger(params):
    backends = []
    worker_id = hvd_rank() if horovod_enabled() else 0
    if worker_id == 0:
        backends += [StdOutBackend(Verbosity.VERBOSE)]
        if params.log_dir:
            os.makedirs(params.log_dir, exist_ok=True)
            log_file = f"{params.log_dir}/log.json"
            backends += [JSONStreamBackend(Verbosity.VERBOSE, log_file)]
    logger.init(backends=backends)
    return logger
Пример #14
0
def setup_logger(args):
    os.makedirs(args.results, exist_ok=True)
    log_path = os.path.join(args.results, args.log_file)

    if os.path.exists(log_path):
        for i in itertools.count():
            s_fname = args.log_file.split('.')
            fname = '.'.join(s_fname[:-1]) + f'_{i}.' + s_fname[-1] if len(s_fname) > 1 else args.stat_file + f'.{i}'
            log_path = os.path.join(args.results, fname)
            if not os.path.exists(log_path):
                break

    def metric_format(metric, metadata, value):
        return "{}: {}".format(metric, f'{value:.5f}' if isinstance(value, float) else value)
    def step_format(step):
        if step == ():
            return "Finished |"
        elif isinstance(step, int):
            return "Step {0: <5} |".format(step)
        return "Step {} |".format(step)


    if not dist.is_initialized() or not args.distributed_world_size > 1 or args.distributed_rank == 0:
        dllogger.init(backends=[JSONStreamBackend(verbosity=1, filename=log_path),
                                TensorBoardBackend(verbosity=1, log_dir=args.results),
                                StdOutBackend(verbosity=2, 
                                              step_format=step_format,
                                              prefix_format=lambda x: "")#,
                                              #metric_format=metric_format)
                                ])
    else:
        dllogger.init(backends=[])
    dllogger.log(step='PARAMETER', data=vars(args), verbosity=0)

    container_setup_info = {**get_framework_env_vars(), **get_system_info()}
    dllogger.log(step='ENVIRONMENT', data=container_setup_info, verbosity=0)

    dllogger.metadata('loss', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
    dllogger.metadata('P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
    dllogger.metadata('P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
    dllogger.metadata('P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TRAIN', 'format': ':5f'})
    dllogger.metadata('items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'TRAIN', 'format': ':1f'})
    dllogger.metadata('val_loss', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format':':5f'})
    dllogger.metadata('val_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'})
    dllogger.metadata('val_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'})
    dllogger.metadata('val_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'VAL', 'format': ':5f'})
    dllogger.metadata('val_items/s', {'GOAL': 'MAXIMIZE', 'STAGE': 'VAL', 'format': ':1f'})
    dllogger.metadata('test_P10', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
    dllogger.metadata('test_P50', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
    dllogger.metadata('test_P90', {'GOAL': 'MINIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
    dllogger.metadata('throughput', {'GOAL': 'MAXIMIZE', 'STAGE': 'TEST', 'format': ':1f'})
    dllogger.metadata('latency_p90', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
    dllogger.metadata('latency_p95', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
    dllogger.metadata('latency_p99', {'GOAL': 'MIMIMIZE', 'STAGE': 'TEST', 'format': ':5f'})
Пример #15
0
def init_log(args):

    enabled = not dist.is_initialized() or dist.get_rank() == 0
    if enabled:
        fpath = args.log_file or os.path.join(args.output_dir, 'nvlog.json')
        backends = [
            JSONStreamBackend(Verbosity.DEFAULT, unique_log_fpath(fpath)),
            StdOutBackend(Verbosity.VERBOSE,
                          step_format=stdout_step_format,
                          metric_format=stdout_metric_format)
        ]
    else:
        backends = []

    dllogger.init(backends=backends)
    dllogger.metadata("train_lrate", {"name": "lrate", "format": ":>3.2e"})

    for id_, pref in [('train', ''), ('train_avg', 'avg train '),
                      ('dev_ema', '  dev ema ')]:

        dllogger.metadata(f"{id_}_loss", {
            "name": f"{pref}loss",
            "format": ":>7.2f"
        })

        dllogger.metadata(f"{id_}_wer", {
            "name": f"{pref}wer",
            "format": ":>6.2f"
        })

        dllogger.metadata(f"{id_}_pplx", {
            "name": f"{pref}pplx",
            "format": ":>6.2f"
        })

        dllogger.metadata(f"{id_}_throughput", {
            "name": f"{pref}utts/s",
            "format": ":>5.0f"
        })

        dllogger.metadata(f"{id_}_took", {
            "name": "took",
            "unit": "s",
            "format": ":>5.2f"
        })

    tb_subsets = ['train', 'dev_ema']
    global tb_loggers
    tb_loggers = {
        s: TBLogger(enabled, args.output_dir, name=s)
        for s in tb_subsets
    }

    log_parameters(vars(args), tb_subset='train')
Пример #16
0
def get_dllogger(params):
    backends = []
    if is_main_process():
        backends += [StdOutBackend(Verbosity.VERBOSE)]
        if params.log_dir:
            backends += [
                JSONStreamBackend(Verbosity.VERBOSE,
                                  os.path.join(params.log_dir, "log.json"))
            ]
    logger.init(backends=backends)
    return logger
Пример #17
0
def setup_dllogger(rank, enabled=True, filename='log.json'):
    if enabled and rank == 0:
        backends = [
            StdOutBackend(Verbosity.DEFAULT),
            JSONStreamBackend(
                Verbosity.VERBOSE,
                filename,
            ),
        ]
        DLLogger.init(backends)
    else:
        DLLogger.init([])
Пример #18
0
def get_logger(params):
    """ Get logger object

    :param params: Dict with additional parameters
    :return: logger
    """
    backends = []
    if hvd.rank() == 0:
        backends += [StdOutBackend(Verbosity.VERBOSE)]
        if params.log_dir:
            backends += [JSONStreamBackend(Verbosity.VERBOSE, params.log_dir)]
    logger.init(backends=backends)
    return logger
Пример #19
0
def log(logname, dice, epoch=None, dice_tta=None):
    dllogger = Logger(backends=[
        JSONStreamBackend(Verbosity.VERBOSE, os.path.join(
            args.results, logname)),
        StdOutBackend(Verbosity.VERBOSE, step_format=lambda step: ""),
    ])
    metrics = {}
    if epoch is not None:
        metrics.update({"Epoch": epoch})
    metrics.update({"Mean dice": round(dice.mean().item(), 2)})
    if dice_tta is not None:
        metrics.update({"Mean TTA dice": round(dice_tta.mean().item(), 2)})
    metrics.update({f"L{j+1}": round(m.item(), 2) for j, m in enumerate(dice)})
    if dice_tta is not None:
        metrics.update({
            f"TTA_L{j+1}": round(m.item(), 2)
            for j, m in enumerate(dice_tta)
        })
    dllogger.log(step=(), data=metrics)
    dllogger.flush()
Пример #20
0
 def __init__(self, args):
     super(Model, self).__init__()
     self.save_hyperparameters()
     self.args = args
     self.f1_score = F1(args)
     self.model = UNetLoc(args) if args.type == "pre" else get_dmg_unet(
         args)
     self.loss = Loss(args)
     self.best_f1 = torch.tensor(0)
     self.best_epoch = 0
     self.tta_flips = [[2], [3], [2, 3]]
     self.lr = args.lr
     self.n_class = 2 if self.args.type == "pre" else 5
     self.softmax = nn.Softmax(dim=1)
     self.test_idx = 0
     self.dllogger = Logger(backends=[
         JSONStreamBackend(
             Verbosity.VERBOSE,
             os.path.join(args.results, f"{args.logname}.json")),
         StdOutBackend(Verbosity.VERBOSE,
                       step_format=lambda step: f"Epoch: {step} "),
     ])
Пример #21
0
def setup_logger(config):
    log_path = config.get("log_path", os.getcwd())
    if is_main_process():
        backends = [
            TensorBoardBackend(verbosity=dllogger.Verbosity.VERBOSE,
                               log_dir=log_path),
            JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
                              filename=os.path.join(log_path, "log.json")),
            AggregatorBackend(verbosity=dllogger.Verbosity.VERBOSE,
                              agg_dict={"loss": AverageMeter}),
            StdOutBackend(
                verbosity=dllogger.Verbosity.DEFAULT,
                step_format=empty_step_format,
                metric_format=no_string_metric_format,
                prefix_format=empty_prefix_format,
            ),
        ]

        logger = Logger(backends=backends)
    else:
        logger = Logger(backends=[])
    container_setup_info = get_framework_env_vars()
    logger.log(step="PARAMETER",
               data=container_setup_info,
               verbosity=dllogger.Verbosity.DEFAULT)

    logger.metadata("loss", {
        "unit": "nat",
        "GOAL": "MINIMIZE",
        "STAGE": "TRAIN"
    })
    logger.metadata("val_loss", {
        "unit": "nat",
        "GOAL": "MINIMIZE",
        "STAGE": "VAL"
    })
    return logger
Пример #22
0
    def __init__(self, args):
        super(NNUnet, self).__init__()
        self.args = args
        self.save_hyperparameters()
        self.build_nnunet()
        self.loss = Loss()
        self.dice = Dice(self.n_class)
        self.best_sum = 0
        self.eval_dice = 0
        self.best_sum_epoch = 0
        self.best_dice = self.n_class * [0]
        self.best_epoch = self.n_class * [0]
        self.best_sum_dice = self.n_class * [0]
        self.learning_rate = args.learning_rate
        if self.args.exec_mode in ["train", "evaluate"]:
            self.dllogger = Logger(backends=[
                JSONStreamBackend(Verbosity.VERBOSE,
                                  os.path.join(args.results, "logs.json")),
                StdOutBackend(Verbosity.VERBOSE,
                              step_format=lambda step: f"Epoch: {step} "),
            ])

        self.tta_flips = ([[2], [3], [2, 3]] if self.args.dim == 2 else
                          [[2], [3], [4], [2, 3], [2, 4], [3, 4], [2, 3, 4]])
 def __init__(self, log_path="bert_dllog.json"):
     self.logger = Logger([
         StdOutBackend(Verbosity.DEFAULT, step_format=self.format_step),
         JSONStreamBackend(Verbosity.VERBOSE, log_path),
     ])
     self.logger.metadata("mlm_loss", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "TRAIN"
     })
     self.logger.metadata("nsp_loss", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "TRAIN"
     })
     self.logger.metadata("avg_loss_step", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "TRAIN"
     })
     self.logger.metadata("total_loss", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "TRAIN"
     })
     self.logger.metadata("loss", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "TRAIN"
     })
     self.logger.metadata("f1", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "VAL"
     })
     self.logger.metadata("precision", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "VAL"
     })
     self.logger.metadata("recall", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "VAL"
     })
     self.logger.metadata("mcc", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "VAL"
     })
     self.logger.metadata("exact_match", {
         "format": ":.4f",
         "GOAL": "MINIMIZE",
         "STAGE": "VAL"
     })
     self.logger.metadata(
         "throughput_train",
         {
             "unit": "seq/s",
             "format": ":.3f",
             "GOAL": "MAXIMIZE",
             "STAGE": "TRAIN"
         },
     )
     self.logger.metadata(
         "throughput_inf",
         {
             "unit": "seq/s",
             "format": ":.3f",
             "GOAL": "MAXIMIZE",
             "STAGE": "VAL"
         },
     )
Пример #24
0
def main():

    parser = argparse.ArgumentParser(description='PyTorch Tacotron 2 Training')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    if 'LOCAL_RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        local_rank = int(os.environ['LOCAL_RANK'])
        world_size = int(os.environ['WORLD_SIZE'])
    else:
        local_rank = args.rank
        world_size = args.world_size

    distributed_run = world_size > 1

    if local_rank == 0:
        log_file = os.path.join(args.output, args.log_file)
        DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, log_file),
                                StdOutBackend(Verbosity.VERBOSE)])
    else:
        DLLogger.init(backends=[])

    for k,v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k:v})
    DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'})

    model_name = args.model_name
    parser = models.model_parser(model_name, parser)
    args, _ = parser.parse_known_args()

    torch.backends.cudnn.enabled = args.cudnn_enabled
    torch.backends.cudnn.benchmark = args.cudnn_benchmark

    if distributed_run:
        init_distributed(args, world_size, local_rank, args.group_name)

    torch.cuda.synchronize()
    run_start_time = time.perf_counter()

    model_config = models.get_model_config(model_name, args)
    model = models.get_model(model_name, model_config,
                             cpu_run=False,
                             uniform_initialize_bn_weight=not args.disable_uniform_initialize_bn_weight)

    if distributed_run:
        model = DDP(model,device_ids=[local_rank],output_device=local_rank)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler(enabled=args.amp)

    try:
        sigma = args.sigma
    except AttributeError:
        sigma = None

    start_epoch = [0]

    if args.resume_from_last:
        args.checkpoint_path = get_last_checkpoint_filename(args.output, model_name)

    if args.checkpoint_path is not "":
        load_checkpoint(model, optimizer, start_epoch, model_config,
                        args.amp, args.checkpoint_path, local_rank)

    start_epoch = start_epoch[0]

    criterion = loss_functions.get_loss_function(model_name, sigma)

    try:
        n_frames_per_step = args.n_frames_per_step
    except AttributeError:
        n_frames_per_step = None

    collate_fn = data_functions.get_collate_function(
        model_name, n_frames_per_step)
    trainset = data_functions.get_data_loader(
        model_name, args.dataset_path, args.training_files, args)
    if distributed_run:
        train_sampler = DistributedSampler(trainset)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True

    train_loader = DataLoader(trainset, num_workers=1, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=args.batch_size, pin_memory=False,
                              drop_last=True, collate_fn=collate_fn)

    valset = data_functions.get_data_loader(
        model_name, args.dataset_path, args.validation_files, args)

    batch_to_gpu = data_functions.get_batch_to_gpu(model_name)

    iteration = 0
    train_epoch_items_per_sec = 0.0
    val_loss = 0.0
    num_iters = 0

    model.train()

    for epoch in range(start_epoch, args.epochs):
        torch.cuda.synchronize()
        epoch_start_time = time.perf_counter()
        # used to calculate avg items/sec over epoch
        reduced_num_items_epoch = 0

        train_epoch_items_per_sec = 0.0

        num_iters = 0
        reduced_loss = 0

        # if overflow at the last iteration then do not save checkpoint
        overflow = False

        if distributed_run:
            train_loader.sampler.set_epoch(epoch)

        for i, batch in enumerate(train_loader):
            torch.cuda.synchronize()
            iter_start_time = time.perf_counter()
            DLLogger.log(step=(epoch, i),
                         data={'glob_iter/iters_per_epoch': str(iteration)+"/"+str(len(train_loader))})

            adjust_learning_rate(iteration, epoch, optimizer, args.learning_rate,
                                 args.anneal_steps, args.anneal_factor, local_rank)

            model.zero_grad()
            x, y, num_items = batch_to_gpu(batch)

            #AMP upstream autocast
            with torch.cuda.amp.autocast(enabled=args.amp):
                y_pred = model(x)
                loss = criterion(y_pred, y)
            
            if distributed_run:
                reduced_loss = reduce_tensor(loss.data, world_size).item()
                reduced_num_items = reduce_tensor(num_items.data, 1).item()
            else:
                reduced_loss = loss.item()
                reduced_num_items = num_items.item()
            if np.isnan(reduced_loss):
                raise Exception("loss is NaN")

            DLLogger.log(step=(epoch,i), data={'train_loss': reduced_loss})

            num_iters += 1

            # accumulate number of items processed in this epoch
            reduced_num_items_epoch += reduced_num_items

            if args.amp:
                scaler.scale(loss).backward()

                scaler.unscale_(optimizer)
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), args.grad_clip_thresh)
                
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad(set_to_none=True)  

            else:
                loss.backward()
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), args.grad_clip_thresh)

                optimizer.step()

            torch.cuda.synchronize()
            iter_stop_time = time.perf_counter()
            iter_time = iter_stop_time - iter_start_time
            items_per_sec = reduced_num_items/iter_time
            train_epoch_items_per_sec += items_per_sec

            DLLogger.log(step=(epoch, i), data={'train_items_per_sec': items_per_sec})
            DLLogger.log(step=(epoch, i), data={'train_iter_time': iter_time})
            iteration += 1

        torch.cuda.synchronize()
        epoch_stop_time = time.perf_counter()
        epoch_time = epoch_stop_time - epoch_start_time

        DLLogger.log(step=(epoch,), data={'train_items_per_sec':
                                          (train_epoch_items_per_sec/num_iters if num_iters > 0 else 0.0)})
        DLLogger.log(step=(epoch,), data={'train_loss': reduced_loss})
        DLLogger.log(step=(epoch,), data={'train_epoch_time': epoch_time})

        val_loss, val_items_per_sec = validate(model, criterion, valset, epoch,
                                               iteration, args.batch_size,
                                               world_size, collate_fn,
                                               distributed_run, local_rank,
                                               batch_to_gpu)

        if (epoch % args.epochs_per_checkpoint == 0) and args.bench_class == "":
            save_checkpoint(model, optimizer, scaler, epoch, model_config,
                            args.amp, args.output, args.model_name,
                            local_rank, world_size)
        if local_rank == 0:
            DLLogger.flush()

    torch.cuda.synchronize()
    run_stop_time = time.perf_counter()
    run_time = run_stop_time - run_start_time
    DLLogger.log(step=tuple(), data={'run_time': run_time})
    DLLogger.log(step=tuple(), data={'val_loss': val_loss})
    DLLogger.log(step=tuple(), data={'train_items_per_sec':
                                     (train_epoch_items_per_sec/num_iters if num_iters > 0 else 0.0)})
    DLLogger.log(step=tuple(), data={'val_items_per_sec': val_items_per_sec})

    if local_rank == 0:
        DLLogger.flush()
Пример #25
0
def main():
    parser = argparse.ArgumentParser(
        description='PyTorch TTS Data Pre-processing')
    parser = parse_args(parser)
    args, unk_args = parser.parse_known_args()
    if len(unk_args) > 0:
        raise ValueError(f'Invalid options {unk_args}')

    if args.extract_pitch_char:
        assert args.extract_durations, "Durations required for pitch extraction"

    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, args.log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})

    model = load_and_setup_model(
        'Tacotron2',
        parser,
        args.tacotron2_checkpoint,
        amp=False,
        device=torch.device('cuda' if args.cuda else 'cpu'),
        forward_is_infer=False,
        ema=False)

    if args.train_mode:
        model.train()

    # n_mel_channels arg has been consumed by model's arg parser
    args.n_mel_channels = model.n_mel_channels

    for datum in ('mels', 'mels_teacher', 'attentions', 'durations',
                  'pitch_mel', 'pitch_char', 'pitch_trichar'):
        if getattr(args, f'extract_{datum}'):
            Path(args.dataset_path, datum).mkdir(parents=False, exist_ok=True)

    filenames = [
        Path(l.split('|')[0]).stem for l in open(args.wav_text_filelist, 'r')
    ]
    # Compatibility with Tacotron2 Data loader
    args.n_speakers = 1
    dataset = FilenamedLoader(filenames,
                              args.dataset_path,
                              args.wav_text_filelist,
                              args,
                              load_mel_from_disk=False)
    # TextMelCollate supports only n_frames_per_step=1
    data_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             sampler=None,
                             num_workers=0,
                             collate_fn=TextMelCollate(1),
                             pin_memory=False,
                             drop_last=False)
    pitch_vecs = {'mel': {}, 'char': {}, 'trichar': {}}
    for i, batch in enumerate(data_loader):
        tik = time.time()
        fnames = batch[-1]
        x, _, _ = batch_to_gpu(batch[:-1])
        _, text_lens, mels_padded, _, mel_lens = x

        for j, mel in enumerate(mels_padded):
            fpath = Path(args.dataset_path, 'mels', fnames[j] + '.pt')
            torch.save(mel[:, :mel_lens[j]].cpu(), fpath)

        with torch.no_grad():
            out_mels, out_mels_postnet, _, alignments = model.forward(x)

        if args.extract_mels_teacher:
            for j, mel in enumerate(out_mels_postnet):
                fpath = Path(args.dataset_path, 'mels_teacher',
                             fnames[j] + '.pt')
                torch.save(mel[:, :mel_lens[j]].cpu(), fpath)
        if args.extract_attentions:
            for j, ali in enumerate(alignments):
                ali = ali[:mel_lens[j], :text_lens[j]]
                fpath = Path(args.dataset_path, 'attentions',
                             fnames[j] + '.pt')
                torch.save(ali.cpu(), fpath)
        durations = []
        if args.extract_durations:
            for j, ali in enumerate(alignments):
                text_len = text_lens[j]
                ali = ali[:mel_lens[j], :text_len]
                dur = torch.histc(torch.argmax(ali, dim=1),
                                  min=0,
                                  max=text_len - 1,
                                  bins=text_len)
                durations.append(dur)
                fpath = Path(args.dataset_path, 'durations', fnames[j] + '.pt')
                torch.save(dur.cpu().int(), fpath)
        if args.extract_pitch_mel or args.extract_pitch_char or args.extract_pitch_trichar:
            for j, dur in enumerate(durations):
                fpath = Path(args.dataset_path, 'pitch_char',
                             fnames[j] + '.pt')
                wav = Path(args.dataset_path, 'wavs', fnames[j] + '.wav')
                p_mel, p_char, p_trichar = calculate_pitch(
                    str(wav),
                    dur.cpu().numpy())
                pitch_vecs['mel'][fnames[j]] = p_mel
                pitch_vecs['char'][fnames[j]] = p_char
                pitch_vecs['trichar'][fnames[j]] = p_trichar

        nseconds = time.time() - tik
        DLLogger.log(step=f'{i+1}/{len(data_loader)} ({nseconds:.2f}s)',
                     data={})

    if args.extract_pitch_mel:
        normalize_pitch_vectors(pitch_vecs['mel'])
        for fname, pitch in pitch_vecs['mel'].items():
            fpath = Path(args.dataset_path, 'pitch_mel', fname + '.pt')
            torch.save(torch.from_numpy(pitch), fpath)

    if args.extract_pitch_char:
        mean, std = normalize_pitch_vectors(pitch_vecs['char'])
        for fname, pitch in pitch_vecs['char'].items():
            fpath = Path(args.dataset_path, 'pitch_char', fname + '.pt')
            torch.save(torch.from_numpy(pitch), fpath)
        save_stats(args.dataset_path, args.wav_text_filelist, 'pitch_char',
                   mean, std)

    if args.extract_pitch_trichar:
        normalize_pitch_vectors(pitch_vecs['trichar'])
        for fname, pitch in pitch_vecs['trichar'].items():
            fpath = Path(args.dataset_path, 'pitch_trichar', fname + '.pt')
            torch.save(torch.from_numpy(pitch), fpath)

    DLLogger.flush()
Пример #26
0
def main():

    parser = argparse.ArgumentParser(
        description='TensorRT Tacotron 2 Inference')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    # initialize CUDA state
    torch.cuda.init()

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    encoder = load_engine(args.encoder, TRT_LOGGER)
    decoder_iter = load_engine(args.decoder, TRT_LOGGER)
    postnet = load_engine(args.postnet, TRT_LOGGER)
    waveglow = load_engine(args.waveglow, TRT_LOGGER)

    if args.waveglow_ckpt != "":
        # setup denoiser using WaveGlow PyTorch checkpoint
        waveglow_ckpt = load_and_setup_model('WaveGlow', parser, args.waveglow_ckpt,
                                             True, forward_is_infer=True)
        denoiser = Denoiser(waveglow_ckpt).cuda()
        # after initialization, we don't need WaveGlow PyTorch checkpoint
        # anymore - deleting
        del waveglow_ckpt
        torch.cuda.empty_cache()

    # create TRT contexts for each engine
    encoder_context = encoder.create_execution_context()
    decoder_context = decoder_iter.create_execution_context()
    postnet_context = postnet.create_execution_context()
    waveglow_context = waveglow.create_execution_context()

    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT,
                                              args.output+'/'+args.log_file),
                            StdOutBackend(Verbosity.VERBOSE)])

    texts = []
    try:
        f = open(args.input, 'r')
        texts = f.readlines()
    except:
        print("Could not read file")
        sys.exit(1)

    measurements = {}

    sequences, sequence_lengths = prepare_input_sequence(texts)
    sequences = sequences.to(torch.int32)
    sequence_lengths = sequence_lengths.to(torch.int32)
    with MeasureTime(measurements, "latency"):
        mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet,
                                               encoder_context, decoder_context, postnet_context,
                                               sequences, sequence_lengths, measurements, args.fp16)
        audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16)

    with encoder_context, decoder_context,  postnet_context, waveglow_context:
        pass

    audios = audios.float()
    if args.waveglow_ckpt != "":
        with MeasureTime(measurements, "denoiser"):
            audios = denoiser(audios, strength=args.denoising_strength).squeeze(1)

    for i, audio in enumerate(audios):
        audio = audio[:mel_lengths[i]*args.stft_hop_length]
        audio = audio/torch.max(torch.abs(audio))
        audio_path = args.output + "audio_"+str(i)+"_trt.wav"
        write(audio_path, args.sampling_rate, audio.cpu().numpy())


    DLLogger.log(step=0, data={"tacotron2_encoder_latency": measurements['tacotron2_encoder_time']})
    DLLogger.log(step=0, data={"tacotron2_decoder_latency": measurements['tacotron2_decoder_time']})
    DLLogger.log(step=0, data={"tacotron2_postnet_latency": measurements['tacotron2_postnet_time']})
    DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']})
    DLLogger.log(step=0, data={"latency": measurements['latency']})

    if args.waveglow_ckpt != "":
        DLLogger.log(step=0, data={"denoiser": measurements['denoiser']})
    DLLogger.flush()

    prec = "fp16" if args.fp16 else "fp32"
    latency = measurements['latency']
    throughput = audios.size(1)/latency
    log_data = "1,"+str(sequence_lengths[0].item())+","+prec+","+str(latency)+","+str(throughput)+","+str(mel_lengths[0].item())+"\n"
    with open("log_bs1_"+prec+".log", 'a') as f:
        f.write(log_data)
def main():
    """
    Launches text to speech (inference).
    Inference is executed on a single GPU or CPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, unknown_args = parser.parse_known_args()

    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, args.log_file),
                            StdOutBackend(Verbosity.VERBOSE)])
    for k,v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k:v})
    DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'})

    measurements_all = {"pre_processing": [],
                        "tacotron2_latency": [],
                        "waveglow_latency": [],
                        "latency": [],
                        "type_conversion": [],
                        "data_transfer": [],
                        "storage": [],
                        "tacotron2_items_per_sec": [],
                        "waveglow_items_per_sec": [],
                        "num_mels_per_audio": [],
                        "throughput": []}

    print("args:", args, unknown_args)

    tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, args.amp_run, args.cpu_run, forward_is_infer=True)
    waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, args.amp_run, args.cpu_run)

    if args.cpu_run:
        denoiser = Denoiser(waveglow, args.cpu_run)
    else:
        denoiser = Denoiser(waveglow, args.cpu_run).cuda()

    jitted_tacotron2 = torch.jit.script(tacotron2)

    texts = ["The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."]
    texts = [texts[0][:args.input_length]]
    texts = texts*args.batch_size

    warmup_iters = 3

    for iter in range(args.num_iters):

        measurements = {}

        with MeasureTime(measurements, "pre_processing", args.cpu_run):
            sequences_padded, input_lengths = prepare_input_sequence(texts, args.cpu_run)

        with torch.no_grad():
            with MeasureTime(measurements, "latency", args.cpu_run):
                with MeasureTime(measurements, "tacotron2_latency", args.cpu_run):
                    mel, mel_lengths, _ = jitted_tacotron2(sequences_padded, input_lengths)

                with MeasureTime(measurements, "waveglow_latency", args.cpu_run):
                    audios = waveglow.infer(mel, sigma=args.sigma_infer)
                    audios = audios.float()
                    audios = denoiser(audios, strength=args.denoising_strength).squeeze(1)

        num_mels = mel.size(0)*mel.size(2)
        num_samples = audios.size(0)*audios.size(1)

        with MeasureTime(measurements, "type_conversion", args.cpu_run):
            audios = audios.float()

        with MeasureTime(measurements, "data_transfer", args.cpu_run):
            audios = audios.cpu()

        with MeasureTime(measurements, "storage", args.cpu_run):
            audios = audios.numpy()
            for i, audio in enumerate(audios):
                audio_path = "audio_"+str(i)+".wav"
                write(audio_path, args.sampling_rate,
                      audio[:mel_lengths[i]*args.stft_hop_length])

        measurements['tacotron2_items_per_sec'] = num_mels/measurements['tacotron2_latency']
        measurements['waveglow_items_per_sec'] = num_samples/measurements['waveglow_latency']
        measurements['num_mels_per_audio'] = mel.size(2)
        measurements['throughput'] = num_samples/measurements['latency']

        if iter >= warmup_iters:
            for k,v in measurements.items():
                measurements_all[k].append(v)
                DLLogger.log(step=(iter-warmup_iters), data={k: v})

    DLLogger.flush()

    print_stats(measurements_all)
Пример #28
0
def main():
    """
    Launches text to speech (inference).
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, args.output + '/' +
                          args.log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})
    DLLogger.log(step="PARAMETER", data={'model_name': 'Tacotron2_PyT'})

    tacotron2 = load_and_setup_model('Tacotron2',
                                     parser,
                                     args.tacotron2,
                                     args.amp_run,
                                     forward_is_infer=True)
    waveglow = load_and_setup_model('WaveGlow',
                                    parser,
                                    args.waveglow,
                                    args.amp_run,
                                    forward_is_infer=True)
    denoiser = Denoiser(waveglow).cuda()

    jitted_tacotron2 = torch.jit.script(tacotron2)

    texts = []
    try:
        f = open(args.input, 'r')
        texts = f.readlines()
    except:
        print("Could not read file")
        sys.exit(1)

    if args.include_warmup:
        sequence = torch.randint(low=0,
                                 high=148,
                                 size=(1, 50),
                                 dtype=torch.long).cuda()
        input_lengths = torch.IntTensor([sequence.size(1)]).cuda().long()
        for i in range(3):
            with torch.no_grad():
                mel, mel_lengths = jitted_tacotron2(sequence, input_lengths)
                _ = waveglow(mel)

    measurements = {}

    sequences_padded, input_lengths = prepare_input_sequence(texts)

    with torch.no_grad(), MeasureTime(measurements, "tacotron2_time"):
        mel, mel_lengths = jitted_tacotron2(sequences_padded, input_lengths)

    with torch.no_grad(), MeasureTime(measurements, "waveglow_time"):
        audios = waveglow(mel, sigma=args.sigma_infer)
        audios = audios.float()
        audios = denoiser(audios, strength=args.denoising_strength).squeeze(1)

    print("Stopping after", mel.size(2), "decoder steps")
    tacotron2_infer_perf = mel.size(0) * mel.size(
        2) / measurements['tacotron2_time']
    waveglow_infer_perf = audios.size(0) * audios.size(
        1) / measurements['waveglow_time']

    DLLogger.log(step=0,
                 data={"tacotron2_items_per_sec": tacotron2_infer_perf})
    DLLogger.log(step=0,
                 data={"tacotron2_latency": measurements['tacotron2_time']})
    DLLogger.log(step=0, data={"waveglow_items_per_sec": waveglow_infer_perf})
    DLLogger.log(step=0,
                 data={"waveglow_latency": measurements['waveglow_time']})
    DLLogger.log(step=0,
                 data={
                     "latency": (measurements['tacotron2_time'] +
                                 measurements['waveglow_time'])
                 })

    for i, audio in enumerate(audios):
        audio = audio[:mel_lengths[i] * args.stft_hop_length]
        audio = audio / torch.max(torch.abs(audio))
        audio_path = args.output + "audio_" + str(i) + ".wav"
        write(audio_path, args.sampling_rate, audio.cpu().numpy())

    DLLogger.flush()
Пример #29
0
def main():
    """
    Launches text to speech (inference).
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(description='PyTorch FastPitch Inference',
                                     allow_abbrev=False)
    parser = parse_args(parser)
    args, unk_args = parser.parse_known_args()

    if args.p_arpabet > 0.0:
        cmudict.initialize(args.cmudict_path, keep_ambiguous=True)

    torch.backends.cudnn.benchmark = args.cudnn_benchmark

    if args.output is not None:
        Path(args.output).mkdir(parents=False, exist_ok=True)

    log_fpath = args.log_file or str(Path(args.output, 'nvlog_infer.json'))
    log_fpath = unique_log_fpath(log_fpath)
    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, log_fpath),
                            StdOutBackend(Verbosity.VERBOSE,
                                          metric_format=stdout_metric_format)])
    init_inference_metadata()
    [DLLogger.log("PARAMETER", {k: v}) for k, v in vars(args).items()]

    device = torch.device('cuda' if args.cuda else 'cpu')

    if args.fastpitch != 'SKIP':
        generator = load_and_setup_model(
            'FastPitch', parser, args.fastpitch, args.amp, device,
            unk_args=unk_args, forward_is_infer=True, ema=args.ema,
            jitable=args.torchscript)

        if args.torchscript:
            generator = torch.jit.script(generator)
    else:
        generator = None

    if args.waveglow != 'SKIP':
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            waveglow = load_and_setup_model(
                'WaveGlow', parser, args.waveglow, args.amp, device,
                unk_args=unk_args, forward_is_infer=True, ema=args.ema)
        denoiser = Denoiser(waveglow).to(device)
        waveglow = getattr(waveglow, 'infer', waveglow)
    else:
        waveglow = None

    if len(unk_args) > 0:
        raise ValueError(f'Invalid options {unk_args}')

    fields = load_fields(args.input)
    batches = prepare_input_sequence(
        fields, device, args.symbol_set, args.text_cleaners, args.batch_size,
        args.dataset_path, load_mels=(generator is None), p_arpabet=args.p_arpabet)

    # Use real data rather than synthetic - FastPitch predicts len
    for _ in tqdm(range(args.warmup_steps), 'Warmup'):
        with torch.no_grad():
            if generator is not None:
                b = batches[0]
                mel, *_ = generator(b['text'])
            if waveglow is not None:
                audios = waveglow(mel, sigma=args.sigma_infer).float()
                _ = denoiser(audios, strength=args.denoising_strength)

    gen_measures = MeasureTime(cuda=args.cuda)
    waveglow_measures = MeasureTime(cuda=args.cuda)

    gen_kw = {'pace': args.pace,
              'speaker': args.speaker,
              'pitch_tgt': None,
              'pitch_transform': build_pitch_transformation(args)}

    if args.torchscript:
        gen_kw.pop('pitch_transform')
        print('NOTE: Pitch transforms are disabled with TorchScript')

    all_utterances = 0
    all_samples = 0
    all_letters = 0
    all_frames = 0

    reps = args.repeats
    log_enabled = reps == 1
    log = lambda s, d: DLLogger.log(step=s, data=d) if log_enabled else None

    for rep in (tqdm(range(reps), 'Inference') if reps > 1 else range(reps)):
        for b in batches:
            if generator is None:
                log(rep, {'Synthesizing from ground truth mels'})
                mel, mel_lens = b['mel'], b['mel_lens']
            else:
                with torch.no_grad(), gen_measures:
                    mel, mel_lens, *_ = generator(b['text'], **gen_kw)

                gen_infer_perf = mel.size(0) * mel.size(2) / gen_measures[-1]
                all_letters += b['text_lens'].sum().item()
                all_frames += mel.size(0) * mel.size(2)
                log(rep, {"fastpitch_frames/s": gen_infer_perf})
                log(rep, {"fastpitch_latency": gen_measures[-1]})

                if args.save_mels:
                    for i, mel_ in enumerate(mel):
                        m = mel_[:, :mel_lens[i].item()].permute(1, 0)
                        fname = b['output'][i] if 'output' in b else f'mel_{i}.npy'
                        mel_path = Path(args.output, Path(fname).stem + '.npy')
                        np.save(mel_path, m.cpu().numpy())

            if waveglow is not None:
                with torch.no_grad(), waveglow_measures:
                    audios = waveglow(mel, sigma=args.sigma_infer)
                    audios = denoiser(audios.float(),
                                      strength=args.denoising_strength
                                      ).squeeze(1)

                all_utterances += len(audios)
                all_samples += sum(audio.size(0) for audio in audios)
                waveglow_infer_perf = (
                    audios.size(0) * audios.size(1) / waveglow_measures[-1])

                log(rep, {"waveglow_samples/s": waveglow_infer_perf})
                log(rep, {"waveglow_latency": waveglow_measures[-1]})

                if args.output is not None and reps == 1:
                    for i, audio in enumerate(audios):
                        audio = audio[:mel_lens[i].item() * args.stft_hop_length]

                        if args.fade_out:
                            fade_len = args.fade_out * args.stft_hop_length
                            fade_w = torch.linspace(1.0, 0.0, fade_len)
                            audio[-fade_len:] *= fade_w.to(audio.device)

                        audio = audio / torch.max(torch.abs(audio))
                        fname = b['output'][i] if 'output' in b else f'audio_{i}.wav'
                        audio_path = Path(args.output, fname)
                        write(audio_path, args.sampling_rate, audio.cpu().numpy())

            if generator is not None and waveglow is not None:
                log(rep, {"latency": (gen_measures[-1] + waveglow_measures[-1])})

    log_enabled = True
    if generator is not None:
        gm = np.sort(np.asarray(gen_measures))
        rtf = all_samples / (all_utterances * gm.mean() * args.sampling_rate)
        log((), {"avg_fastpitch_letters/s": all_letters / gm.sum()})
        log((), {"avg_fastpitch_frames/s": all_frames / gm.sum()})
        log((), {"avg_fastpitch_latency": gm.mean()})
        log((), {"avg_fastpitch_RTF": rtf})
        log((), {"90%_fastpitch_latency": gm.mean() + norm.ppf((1.0 + 0.90) / 2) * gm.std()})
        log((), {"95%_fastpitch_latency": gm.mean() + norm.ppf((1.0 + 0.95) / 2) * gm.std()})
        log((), {"99%_fastpitch_latency": gm.mean() + norm.ppf((1.0 + 0.99) / 2) * gm.std()})
    if waveglow is not None:
        wm = np.sort(np.asarray(waveglow_measures))
        rtf = all_samples / (all_utterances * wm.mean() * args.sampling_rate)
        log((), {"avg_waveglow_samples/s": all_samples / wm.sum()})
        log((), {"avg_waveglow_latency": wm.mean()})
        log((), {"avg_waveglow_RTF": rtf})
        log((), {"90%_waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.90) / 2) * wm.std()})
        log((), {"95%_waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.95) / 2) * wm.std()})
        log((), {"99%_waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.99) / 2) * wm.std()})
    if generator is not None and waveglow is not None:
        m = gm + wm
        rtf = all_samples / (all_utterances * m.mean() * args.sampling_rate)
        log((), {"avg_samples/s": all_samples / m.sum()})
        log((), {"avg_letters/s": all_letters / m.sum()})
        log((), {"avg_latency": m.mean()})
        log((), {"avg_RTF": rtf})
        log((), {"90%_latency": m.mean() + norm.ppf((1.0 + 0.90) / 2) * m.std()})
        log((), {"95%_latency": m.mean() + norm.ppf((1.0 + 0.95) / 2) * m.std()})
        log((), {"99%_latency": m.mean() + norm.ppf((1.0 + 0.99) / 2) * m.std()})
    DLLogger.flush()
Пример #30
0
def main(_):

    # get e2e training time
    begin = time.time()
    logging.info("Training started at: {}".format(time.asctime()))

    hvd.init()

    # Parse and override hparams
    config = hparams_config.get_detection_config(FLAGS.model_name)
    config.override(FLAGS.hparams)
    if FLAGS.num_epochs:  # NOTE: remove this flag after updating all docs.
        config.num_epochs = FLAGS.num_epochs
    if FLAGS.lr:
        config.learning_rate = FLAGS.lr
    if FLAGS.warmup_value:
        config.lr_warmup_init = FLAGS.warmup_value
    if FLAGS.warmup_epochs:
        config.lr_warmup_epoch = FLAGS.warmup_epochs
    config.backbone_init = FLAGS.backbone_init
    config.mixed_precision = FLAGS.amp
    config.image_size = model_utils.parse_image_size(config.image_size)

    # get eval config
    eval_config = hparams_config.get_detection_config(FLAGS.model_name)
    eval_config.override(FLAGS.hparams)
    eval_config.val_json_file = FLAGS.val_json_file
    eval_config.val_file_pattern = FLAGS.val_file_pattern
    eval_config.nms_configs.max_nms_inputs = anchors.MAX_DETECTION_POINTS
    eval_config.drop_remainder = False  # eval all examples w/o drop.
    eval_config.image_size = model_utils.parse_image_size(
        eval_config['image_size'])

    # setup
    setup.set_flags(FLAGS, config, training=True)

    if FLAGS.debug:
        tf.config.experimental_run_functions_eagerly(True)
        tf.debugging.set_log_device_placement(True)
        tf.random.set_seed(111111)
        logging.set_verbosity(logging.DEBUG)

    # Check data path
    if FLAGS.training_file_pattern is None or FLAGS.val_file_pattern is None or FLAGS.val_json_file is None:
        raise RuntimeError(
            'You must specify --training_file_pattern, --val_file_pattern and --val_json_file  for training.'
        )

    steps_per_epoch = (FLAGS.num_examples_per_epoch +
                       (FLAGS.batch_size * get_world_size()) -
                       1) // (FLAGS.batch_size * get_world_size())
    if FLAGS.benchmark == True:
        # For ci perf training runs, run for a fixed number of iterations per epoch
        steps_per_epoch = FLAGS.benchmark_steps
    params = dict(config.as_dict(),
                  model_name=FLAGS.model_name,
                  model_dir=FLAGS.model_dir,
                  steps_per_epoch=steps_per_epoch,
                  checkpoint_period=FLAGS.checkpoint_period,
                  batch_size=FLAGS.batch_size,
                  num_shards=get_world_size(),
                  val_json_file=FLAGS.val_json_file,
                  testdev_dir=FLAGS.testdev_dir,
                  mode='train')
    logging.info('Training params: {}'.format(params))

    # make output dir if it does not exist
    tf.io.gfile.makedirs(FLAGS.model_dir)

    # dllogger setup
    backends = []
    if is_main_process():
        log_path = os.path.join(FLAGS.model_dir, FLAGS.log_filename)
        backends += [
            JSONStreamBackend(verbosity=Verbosity.VERBOSE, filename=log_path),
            StdOutBackend(verbosity=Verbosity.DEFAULT)
        ]

    DLLogger.init(backends=backends)

    def get_dataset(is_training, params):
        file_pattern = (FLAGS.training_file_pattern
                        if is_training else FLAGS.val_file_pattern)
        if not file_pattern:
            raise ValueError('No matching files.')

        return dataloader.InputReader(
            file_pattern,
            is_training=is_training,
            use_fake_data=FLAGS.use_fake_data,
            max_instances_per_image=config.max_instances_per_image,
            enable_map_parallelization=FLAGS.enable_map_parallelization)(
                params)

    num_samples = (FLAGS.eval_samples + get_world_size() -
                   1) // get_world_size()
    num_samples = (num_samples + FLAGS.eval_batch_size -
                   1) // FLAGS.eval_batch_size
    eval_config.num_samples = num_samples

    def get_eval_dataset(eval_config):
        dataset = dataloader.InputReader(
            FLAGS.val_file_pattern,
            is_training=False,
            max_instances_per_image=eval_config.max_instances_per_image)(
                eval_config, batch_size=FLAGS.eval_batch_size)
        dataset = dataset.shard(get_world_size(), get_rank())
        dataset = dataset.take(num_samples)
        return dataset

    eval_dataset = get_eval_dataset(eval_config)

    # pick focal loss implementation
    focal_loss = train_lib.StableFocalLoss(
        params['alpha'],
        params['gamma'],
        label_smoothing=params['label_smoothing'],
        reduction=tf.keras.losses.Reduction.NONE)

    model = train_lib.EfficientDetNetTrain(params['model_name'], config)
    model.build((None, *config.image_size, 3))
    model.compile(
        optimizer=optimizer_builder.get_optimizer(params),
        loss={
            'box_loss':
            train_lib.BoxLoss(params['delta'],
                              reduction=tf.keras.losses.Reduction.NONE),
            'box_iou_loss':
            train_lib.BoxIouLoss(params['iou_loss_type'],
                                 params['min_level'],
                                 params['max_level'],
                                 params['num_scales'],
                                 params['aspect_ratios'],
                                 params['anchor_scale'],
                                 params['image_size'],
                                 reduction=tf.keras.losses.Reduction.NONE),
            'class_loss':
            focal_loss,
            'seg_loss':
            tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
        })
    train_from_epoch = util_keras.restore_ckpt(model,
                                               params['model_dir'],
                                               config.moving_average_decay,
                                               steps_per_epoch=steps_per_epoch)

    print("training_mode: {}".format(FLAGS.training_mode))
    callbacks = callback_builder.get_callbacks(params, FLAGS.training_mode,
                                               eval_config, eval_dataset,
                                               DLLogger, FLAGS.time_history,
                                               FLAGS.log_steps, FLAGS.lr_tb,
                                               FLAGS.benchmark)

    history = model.fit(
        get_dataset(True, params=params),
        epochs=params['num_epochs'],
        steps_per_epoch=steps_per_epoch,
        initial_epoch=train_from_epoch,
        callbacks=callbacks,
        verbose=1 if is_main_process() else 0,
        validation_data=get_dataset(False, params=params)
        if FLAGS.validate else None,
        validation_steps=(FLAGS.eval_samples //
                          FLAGS.eval_batch_size) if FLAGS.validate else None)

    if is_main_process():
        model.save_weights(os.path.join(FLAGS.model_dir, 'ckpt-final'))

    # log final stats
    stats = {}
    for callback in callbacks:
        if isinstance(callback, callback_builder.TimeHistory):
            if callback.epoch_runtime_log:
                stats[
                    'avg_fps_training'] = callback.average_examples_per_second
                stats[
                    'avg_fps_training_per_GPU'] = callback.average_examples_per_second / get_world_size(
                    )
                stats[
                    'avg_latency_training'] = callback.average_time_per_iteration

    if history and history.history:
        train_hist = history.history
        #Gets final loss from training.
        stats['training_loss'] = float(
            hvd.allreduce(tf.constant(train_hist['loss'][-1],
                                      dtype=tf.float32),
                          average=True))

    if os.path.exists(os.path.join(FLAGS.model_dir, 'ema_weights')):
        ckpt_epoch = "%02d" % sorted(set([
            int(f.rsplit('.')[0].rsplit('-')[1])
            for f in os.listdir(os.path.join(FLAGS.model_dir, 'ema_weights'))
            if 'emackpt' in f
        ]),
                                     reverse=True)[0]
        ckpt = os.path.join(FLAGS.model_dir, 'ema_weights',
                            'emackpt-' + str(ckpt_epoch))
        util_keras.restore_ckpt(model,
                                ckpt,
                                eval_config.moving_average_decay,
                                steps_per_epoch=0,
                                skip_mismatch=False,
                                expect_partial=True)
        if is_main_process():
            model.save(os.path.join(FLAGS.model_dir, 'emackpt-final'))
    else:
        ckpt_epoch = 'final'
        ckpt = os.path.join(FLAGS.model_dir, 'ckpt-' + ckpt_epoch)
        if is_main_process():
            model.save(os.path.join(FLAGS.model_dir, 'ckpt-' + ckpt_epoch))

    # Start evaluation of final ema checkpoint
    logging.set_verbosity(logging.WARNING)

    @tf.function
    def model_fn(images, labels):
        cls_outputs, box_outputs = model(images, training=False)
        detections = postprocess.generate_detections(eval_config, cls_outputs,
                                                     box_outputs,
                                                     labels['image_scales'],
                                                     labels['source_ids'])

        tf.numpy_function(evaluator.update_state, [
            labels['groundtruth_data'],
            postprocess.transform_detections(detections)
        ], [])

    if FLAGS.benchmark == False and FLAGS.training_mode == 'train':

        # Evaluator for AP calculation.
        label_map = label_util.get_label_map(eval_config.label_map)
        evaluator = coco_metric.EvaluationMetric(
            filename=eval_config.val_json_file, label_map=label_map)

        evaluator.reset_states()

        # evaluate all images.
        pbar = tf.keras.utils.Progbar(num_samples)
        for i, (images, labels) in enumerate(eval_dataset):
            model_fn(images, labels)
            if is_main_process():
                pbar.update(i)

        # gather detections from all ranks
        evaluator.gather()

        if is_main_process():
            # compute the final eval results.
            metrics = evaluator.result()
            metric_dict = {}
            for i, name in enumerate(evaluator.metric_names):
                metric_dict[name] = metrics[i]

            if label_map:
                for i, cid in enumerate(sorted(label_map.keys())):
                    name = 'AP_/%s' % label_map[cid]
                    metric_dict[name] = metrics[i +
                                                len(evaluator.metric_names)]

            # csv format
            csv_metrics = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl']
            csv_format = ",".join(
                [str(ckpt_epoch)] +
                [str(round(metric_dict[key] * 100, 2)) for key in csv_metrics])
            print(FLAGS.model_name, metric_dict, "csv format:", csv_format)

        MPI.COMM_WORLD.Barrier()

    if is_main_process():
        stats['e2e_training_time'] = time.time() - begin
        DLLogger.log(step=(), data=stats)