Exemplo n.º 1
0
# define model
mt = MusicTransformer(embedding_dim=config.embedding_dim,
                      vocab_size=config.vocab_size,
                      num_layer=config.num_layers,
                      max_seq=config.max_seq,
                      dropout=config.dropout,
                      debug=config.debug,
                      loader_path=config.load_path)
if config.init_ckpt is not None:
    mt.load_state_dict(torch.load(config.init_ckpt))
    print("Weights from %s loaded" % config.init_ckpt)

mt.to(config.device)
opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98),
                 eps=1e-9)  # setting rate inside
scheduler = CustomSchedule(
    config.embedding_dim, optimizer=opt)  # custom implementation of rate decay

# multi-GPU set
if torch.cuda.device_count() > 1:
    single_mt = mt
    mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count() - 1)
else:
    single_mt = mt

# init metric set
metric_set = MetricsSet({
    'accuracy':
    CategoricalAccuracy(),
    'loss':
    SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size,
                           config.pad_token),
Exemplo n.º 2
0
print(dataset)

# load model
learning_rate = config.l_r

# define model
mt = MusicTransformer(embedding_dim=config.embedding_dim,
                      vocab_size=config.vocab_size,
                      num_layer=config.num_layers,
                      max_seq=config.max_seq,
                      dropout=config.dropout,
                      debug=config.debug,
                      loader_path=config.load_path)
mt.to(config.device)
opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)
scheduler = CustomSchedule(config.embedding_dim, optimizer=opt)

# multi-GPU set
if torch.cuda.device_count() > 1:
    single_mt = mt
    mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count() - 1)
else:
    single_mt = mt

# init metric set
metric_set = MetricsSet({
    'accuracy':
    CategoricalAccuracy(),
    'loss':
    SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size,
                           config.pad_token),
# load model
learning_rate = config.l_r

# define model
mt = MusicTransformer(
            embedding_dim=config.embedding_dim,
            vocab_size=config.vocab_size,
            num_layer=config.num_layers,
            max_seq=config.max_seq,
            dropout=config.dropout,
            debug=config.debug, loader_path=config.load_path
)
mt.to(config.device)
opt = optim.Adam(mt.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)
scheduler = CustomSchedule(config.embedding_dim, optimizer=opt)

# multi-GPU set
if torch.cuda.device_count() > 1:
    single_mt = mt
    mt = torch.nn.DataParallel(mt, output_device=torch.cuda.device_count()-1)
else:
    single_mt = mt

# init metric set
metric_set = MetricsSet({
    'accuracy': CategoricalAccuracy(),
    'loss': SmoothCrossEntropyLoss(config.label_smooth, config.vocab_size, config.pad_token),
    'bucket':  LogitsBucketting(config.vocab_size)
})