Exemplo n.º 1
0
    def __init__(self):
        super().__init__()

        # BUILDING MELGAN #################################################
        hparams_melgan = importlib.import_module(config_melgan).config
        hparams_melgan.override(USE_CACHED_PADDING=config.USE_CACHED_PADDING)
        melgan = get_model(hparams_melgan)

        pretrained_state_dict = torch.load(path.join(ROOT, "melgan",
                                                     "melgan_state.pth"),
                                           map_location="cpu")[0]
        state_dict = melgan.state_dict()
        state_dict.update(pretrained_state_dict)
        melgan.load_state_dict(state_dict)
        ###################################################################

        # BUILDING VANILLA ################################################
        hparams_vanilla = importlib.import_module(config_vanilla).config
        hparams_vanilla.override(USE_CACHED_PADDING=config.USE_CACHED_PADDING)
        vanilla = get_model(hparams_vanilla)

        pretrained_state_dict = torch.load(path.join(ROOT, "vanilla",
                                                     "vanilla_state.pth"),
                                           map_location="cpu")
        state_dict = vanilla.state_dict()
        state_dict.update(pretrained_state_dict)
        vanilla.load_state_dict(state_dict)
        ###################################################################

        vanilla.eval()
        melgan.eval()

        # PRETRACE MODELS #################################################
        self.latent_size = int(config.CHANNELS[-1] // 2)
        self.mel_size = int(config.CHANNELS[0])

        if config.USE_CACHED_PADDING:
            test_wav = torch.randn(1, config.BUFFER_SIZE)
            test_mel = torch.randn(1, config.INPUT_SIZE, 2)
            if hparams_vanilla.EXTRACT_LOUDNESS:
                test_z = torch.randn(1, self.latent_size + 1, 1)
            else:
                test_z = torch.randn(1, self.latent_size, 1)

        else:
            test_wav = torch.randn(1, 8192)
            test_mel = torch.randn(1, config.INPUT_SIZE, 16)
            if hparams_vanilla.EXTRACT_LOUDNESS:
                test_z = torch.randn(1, self.latent_size + 1, 16)
            else:
                test_z = torch.randn(1, self.latent_size, 16)

        melencoder = TracedMelEncoder(
            vanilla.melencoder,
            BufferSTFT(config.BUFFER_SIZE, config.HOP_LENGTH),
            config.HOP_LENGTH, config.USE_CACHED_PADDING)

        logloudness = LogLoudness(
            int(hparams_vanilla.HOP_LENGTH * np.prod(hparams_vanilla.RATIOS)),
            1e-4)

        self.trace_logloudness = torch.jit.script(logloudness)
        self.trace_melencoder = torch.jit.trace(melencoder,
                                                test_wav,
                                                check_trace=False)
        self.trace_encoder = torch.jit.trace(vanilla.topvae.encoder,
                                             test_mel,
                                             check_trace=False)
        self.trace_decoder = torch.jit.trace(vanilla.topvae.decoder,
                                             test_z,
                                             check_trace=False)
        self.trace_melgan = torch.jit.trace(melgan.decoder,
                                            test_mel,
                                            check_trace=False)

        config.override(SAMPRATE=hparams_vanilla.SAMPRATE,
                        N_SIGNAL=hparams_vanilla.N_SIGNAL,
                        EXTRACT_LOUDNESS=hparams_vanilla.EXTRACT_LOUDNESS,
                        TYPE=hparams_vanilla.TYPE,
                        HOP_LENGTH=hparams_vanilla.HOP_LENGTH,
                        RATIOS=hparams_vanilla.RATIOS,
                        WAV_LOC=hparams_vanilla.WAV_LOC,
                        LMDB_LOC=hparams_vanilla.LMDB_LOC)

        self.pca = None

        if PCA:
            try:
                self.pca = torch.load(path.join(ROOT, "pca.pth"))
                print("Precomputed pca found")

            except:
                if config.USE_CACHED_PADDING:
                    raise Exception(
                        "PCA should be first computed in non cache mode")
                print("No precomputed pca found. Computing.")
                self.pca = None

            if self.pca == None:
                self.pca = compute_pca(self, 32)
                torch.save(self.pca, path.join(ROOT, "pca.pth"))

            self.register_buffer("mean", self.pca[0])
            self.register_buffer("std", self.pca[1])
            self.register_buffer("U", self.pca[2])

            self.extract_loudness = config.EXTRACT_LOUDNESS
Exemplo n.º 2
0
def _load_model():
    global model
    model = get_model(model_bucket, model_filename)
    logger.debug('successfully loaded model')
Exemplo n.º 3
0
config.parse_args()

# PREPARE DATA
dataset = Loader(config.AUGMENT)
dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=config.BATCH,
                                         shuffle=True,
                                         drop_last=True)

# PREPARE MODELS
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# MELGAN TRAINING
if config.TYPE == "melgan":
    gen = get_model()
    dis = Discriminator()

    if config.CKPT is not None:
        ckptgen, ckptdis = torch.load(config.CKPT, map_location="cpu")
        gen.load_state_dict(ckptgen)
        dis.load_state_dict(ckptdis)

    gen = gen.to(device)
    dis = dis.to(device)

    # PREPARE OPTIMIZERS
    opt_gen = torch.optim.Adam(gen.parameters(), lr=config.LR, betas=[.5, .9])
    opt_dis = torch.optim.Adam(dis.parameters(), lr=config.LR, betas=[.5, .9])

    model = gen, dis
Exemplo n.º 4
0
def main(mode=None):
    r"""starts the model

    Args:
        mode (int): 1: train, 2: test, 3: eval, reads from config file if not specified
    """

    config = load_config(mode)

    # initialize random seed
    torch.manual_seed(config.SEED)
    torch.cuda.manual_seed_all(config.SEED)
    np.random.seed(config.SEED)
    random.seed(config.SEED)

    if mode == 1:
        # copy network files as a backup when training
        os.system("cp -r ./src %s/" % config.PATH)

    elif mode == 2:
        # select source code when testing
        global Config, get_model
        src = importlib.import_module("checkpoints.%s.src" %
                                      (os.path.basename(config.PATH)))
        Config = src.Config
        get_model = src.get_model

    # cuda visble devices
    # os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(e) for e in config.GPU)
    if os.environ.get('CUDA_VISIBLE_DEVICES'):
        CUDA_VISIBLE_DEVICES = [
            int(x) for x in list(os.environ.get("CUDA_VISIBLE_DEVICES"))
            if x.isdigit()
        ]
        config.GPU = list(range(len(CUDA_VISIBLE_DEVICES)))

    # init device
    if torch.cuda.is_available():
        config.DEVICE = torch.device("cuda")
        torch.backends.cudnn.benchmark = True  # cudnn auto-tuner
    else:
        config.DEVICE = torch.device("cpu")

    # set cv2 running threads to 1 (prevents deadlocks with pytorch dataloader)
    cv2.setNumThreads(0)

    # import inpainting model
    InpaintingModel = get_model(model=config.MODEL)

    # build the model and initialize
    model = InpaintingModel(config)
    model.load()

    # model training
    if config.MODE == 1:
        config.print()
        model.print()
        print('\nstart training...\n')
        model.train()

    # model test
    elif config.MODE == 2:
        print('\nstart testing...\n')
        model.test()

    # eval mode
    else:
        print('\nstart eval...\n')
        model.eval()