def model_fn(model_dir): logger.info('Loading the model.') model_info = {} with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f: model_info = torch.load(f) print('model_info: {}'.format(model_info)) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info('Current device: {}'.format(device)) model = RNNModel(rnn_type=model_info['rnn_type'], ntoken=model_info['ntoken'], ninp=model_info['ninp'], nhid=model_info['nhid'], nlayers=model_info['nlayers'], dropout=model_info['dropout'], tie_weights=model_info['tie_weights']) with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: model.load_state_dict(torch.load(f)) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass model.rnn.flatten_parameters() model.to(device).eval() logger.info('Loading the data.') corpus = data.Corpus(model_dir) logger.info('Done loading model and corpus. Corpus dictionary size: {}'.format(len(corpus.dictionary))) return {'model': model, 'corpus': corpus}
#0.25, 0.1, 0.2, 0.02, 0.15 # net = CharLSTM(sequence_len=sequence_len, vocab_size=vocab_size, hidden_dim=hs, batch_size=batch_size, n_fac=n_fac, device="cuda:0") net = RNNModel(rnn_type="LSTM", ntoken=vocab_size, ninp=hs, nhid=hs, nlayers=3, dropout=0.25, dropouth=0.1, dropouti=0.2, dropoute=0.02, wdrop=0, tie_weights=False, device="cuda:0") try: net.to(net.device) except: net.to(net.device) # optimizer = optim.Adam(net.parameters(), lr=30, weight_decay=0.0001 ) optimizer = torch.optim.SGD(net.parameters(), lr=1e3, momentum=0.90, weight_decay=1.2e-6, nesterov=False) # get the validation and the training data val_idx = int(len(encoded) * (1 - 0.1)) data, val_data = encoded[:val_idx], encoded[val_idx:] # empty list for the validation losses val_losses = list()