try: torch.nn.init.kaiming_uniform_(m.weight.data) m.bias.data.zero_() except: pass device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if "residual" in experiment: model = Model(num_features=num_features, num_residual=num_residual).to(device) else: model = Model(num_features=num_features).to(device) criterion = nn.CrossEntropyLoss(ignore_index=8) model.apply(init_weights) print(type(model)) if os.path.exists(latest_model_path): print("Model exists. Loading from {0}".format(latest_model_path)) model = torch.load(latest_model_path) optimizer = optim.Adam(model.parameters(), lr=lr) if os.path.exists(optim_path): print("Optimizer state dict exists. Loading from {0}".format(optim_path)) optim = torch.load(optim_path) optimizer.load_state_dict(optim['optimizer']) model.to(device) print("Model is using GPU: {0}".format(next(model.parameters()).is_cuda))