model = MainModel(Config) model_dict=model.state_dict() pretrained_dict=torch.load(args.resume) pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) # add tensorboard graph of structure if args.log_dir: if args.add_stureture_graph: dummy_input = (torch.zeros(1, 3, args.crop_resolution, args.crop_resolution)) outputs = model(dummy_input) sw.add_graph(model, dummy_input) # get weight of feature 3202*2048, DCL 对应着-4层全职,ResNet50 对应着 params=list(model.parameters()) weight_softmax = np.squeeze(params[-3].data.numpy()) model.cuda() model = nn.DataParallel(model) model.train(False) if args.feature: result=[] # feature = pd.DataFrame(columns=range(len(data_set))) with torch.no_grad(): result_1=[] confidence_1=[] all_result=[] feature=[]
model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('Set cache dir', flush=True) time = datetime.datetime.now() num_cores = 8 devices = (xm.get_xla_supported_devices( max_devices=num_cores) if num_cores != 0 else []) # Scale learning rate to num cores base_lr = args.base_lr * max(len(devices), 1) # Pass [] as device_ids to run using the PyTorch/CPU engine. model_parallel = dp.DataParallel(model, device_ids=devices) # optimizer prepare ignored_params1 = list(map(id, model.classifier.parameters())) ignored_params2 = list(map(id, model.classifier_swap.parameters())) ignored_params3 = list(map(id, model.Convmask.parameters())) ignored_params = ignored_params1 + ignored_params2 + ignored_params3 print('the num of new layers:', len(ignored_params), flush=True) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) # exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=args.decay_step, gamma=0.1) # exp_lr_scheduler.step(epoch) # train entry for epoch in range(1, args.epoch + 1): model_parallel(train, dataloader_train)