def main(args): args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" class_num = {'cub': 200, 'cars': 196, 'fgvc': 100} if args.seed is None: args.seed = random.randint(1, 10000) random.seed(args.seed) torch.manual_seed(args.seed) device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': torch.cuda.manual_seed_all(args.seed) model = get_model(args.model, class_num[args.dataset]) model = torch.nn.DataParallel(model).cuda() if device == 'cuda': model = model.cuda() #model = torch.nn.DataParallel(model) cudnn.benchmark = True criterion = nn.CrossEntropyLoss() new_param_ids = set(map(id, model.module.fc.parameters())) base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups_base = [{'params': base_params, 'lr_mult': 0.1}] param_groups_new = [{ 'params': model.module.fc.parameters(), 'lr_mult': 1.0 }] if args.alg == 'sgd': optimizer_base = optim.SGD(param_groups_base, args.lr, momentum=0.9) optimizer_new = optim.SGD(param_groups_new, args.lr, momentum=0.9) elif args.alg == 'rmsprop': optimizer_base = optim.RMSprop(param_groups_base, args.lr) optimizer_new = optim.RMSprop(param_groups_new, args.lr) elif args.alg == 'adam': optimizer_base = optim.Adam(param_groups_base, args.lr) optimizer_new = optim.Adam(param_groups_new, args.lr) elif args.alg == 'adamw': optimizer_base = optim.AdamW(param_groups_base, args.lr) optimizer_new = optim.AdamW(param_groups_new, args.lr) elif args.alg == 'diffgrad': optimizer_base = diffgrad(param_groups_base, args.lr) optimizer_new = diffgrad(param_groups_new, args.lr) elif args.alg == 'cosangulargrad': optimizer_base = cosangulargrad(param_groups_base, args.lr) optimizer_new = cosangulargrad(param_groups_new, args.lr) elif args.alg == 'tanangulargrad': optimizer_base = tanangulargrad(param_groups_base, args.lr) optimizer_new = tanangulargrad(param_groups_new, args.lr) else: print('==> Optimizer not found...') exit() exp_lr_scheduler_new = lr_scheduler.MultiStepLR(optimizer_new, milestones=[30, 50], gamma=0.1) exp_lr_scheduler_base = lr_scheduler.MultiStepLR(optimizer_base, milestones=[30, 50], gamma=0.1) train_loader, val_loader = get_loaders(args) best_acc = -1 datass = np.ones((4, args.epochs)) * -1000.0 for epoch in range(args.start_epoch, args.epochs): train_acc, train_loss = train(train_loader, model, criterion, optimizer_base, optimizer_new, epoch, args) exp_lr_scheduler_new.step() exp_lr_scheduler_base.step() val_acc, val_loss = validate(val_loader, model, criterion, args) if val_acc > best_acc: print('Saving..') state = { 'model': model.state_dict(), 'acc': val_acc, 'epoch': epoch, 'best_acc': best_acc, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt.t7') best_acc = val_acc
def main(args): # 1. prepare data & models train_transforms = transforms.Compose([ ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)), CropCenter(CROP_SIZE), TransformByKeys(transforms.ToPILImage(), ("image", )), TransformByKeys(transforms.ToTensor(), ("image", )), TransformByKeys( transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image", ), ), ]) print("Reading data...") train_dataset = ThousandLandmarksDataset( os.path.join(args.data, "train"), train_transforms, split="train", debug=args.debug, ) train_dataloader = data.DataLoader( train_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True, shuffle=True, drop_last=True, ) val_dataset = ThousandLandmarksDataset( os.path.join(args.data, "train"), train_transforms, split="val", debug=args.debug, ) val_dataloader = data.DataLoader( val_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True, shuffle=False, drop_last=False, ) print("Creating model...") device = torch.device("cuda: 0") if args.gpu else torch.device("cpu") model = models.resnet50(pretrained=True) model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True) model.to(device) for name, child in model.named_children(): if name in ["fc"]: for param in child.parameters(): param.requires_grad = True else: for param in child.parameters(): param.requires_grad = False optimizer = optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), lr=args.learning_rate, momentum=0.9, weight_decay=1e-04, ) scheduler = optim.lr_scheduler.OneCycleLR( optimizer, max_lr=0.1, steps_per_epoch=len(train_dataloader), epochs=args.epochs) loss = L.WingLoss(width=10, curvature=2, reduction="mean") # 2. train & validate print("Ready for training...") for epoch in range(args.epochs): train_loss = train(model, train_dataloader, loss, optimizer, device=device, scheduler=scheduler) val_loss = validate(model, val_dataloader, loss, device=device) print("Epoch #{:2}:\ttrain loss: {:6.3}\tval loss: {:6.3}".format( epoch, train_loss, val_loss)) # 2.1. train continued for p in model.parameters(): p.requires_grad = True optimizer = optim.AdamW( [ { "params": model.conv1.parameters(), "lr": 1e-6 }, { "params": model.bn1.parameters(), "lr": 1e-6 }, { "params": model.relu.parameters(), "lr": 1e-5 }, { "params": model.maxpool.parameters(), "lr": 1e-5 }, { "params": model.layer1.parameters(), "lr": 1e-4 }, { "params": model.layer2.parameters(), "lr": 1e-4 }, { "params": model.layer3.parameters(), "lr": 1e-3 }, { "params": model.layer4.parameters(), "lr": 1e-3 }, { "params": model.avgpool.parameters(), "lr": 1e-2 }, { "params": model.fc.parameters(), "lr": 1e-2 }, ], lr=args.learning_rate, weight_decay=1e-06, amsgrad=True, ) scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2) print("Ready for training again...") best_val_loss = np.inf for epoch in range(args.epochs): train_loss = train(model, train_dataloader, loss, optimizer, device=device, scheduler=scheduler) val_loss = validate(model, val_dataloader, loss, device=device) print("Epoch #{:2}:\ttrain loss: {:6.3}\tval loss: {:6.3}".format( epoch, train_loss, val_loss)) if val_loss < best_val_loss: best_val_loss = val_loss with open(f"{args.name}_best.pth", "wb") as fp: torch.save(model.state_dict(), fp) # 3. predict if not args.debug: test_dataset = ThousandLandmarksDataset( os.path.join(args.data, "test"), train_transforms, split="test", debug=args.debug, ) test_dataloader = data.DataLoader( test_dataset, batch_size=args.batch_size, num_workers=4, pin_memory=True, shuffle=False, drop_last=False, ) with open(f"submit/{args.name}_best.pth", "rb") as fp: best_state_dict = torch.load(fp, map_location="cpu") model.load_state_dict(best_state_dict) test_predictions = predict(model, test_dataloader, device) with open(f"submit/{args.name}_test_predictions.pkl", "wb") as fp: pickle.dump( { "image_names": test_dataset.image_names, "landmarks": test_predictions, }, fp, ) create_submission(args.data, test_predictions, f"submit/{args.name}_submit.csv")
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: # args.rank = int(os.environ["RANK"]) args.rank = 1 if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # Training dataset train_dataset = [] if (args.dataset == 'VOC'): train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() elif (args.dataset == 'COCO'): train_dataset = CocoDataset(root_dir=args.dataset_root, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() elif (args.dataset == 'BreastCancer'): train_dataset = BreastDataset(root="None", set_type="train", transforms=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) valid_dataset = BreastDataset(root="None", set_type='val', transforms=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, collate_fn=collater, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=1, num_workers=args.workers, shuffle=False, collate_fn=collater, pin_memory=True) checkpoint = [] if (args.resume is not None): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network args.start_epoch = checkpoint['epoch'] + 1 del params model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class']) if (args.resume is not None): model.load_state_dict(checkpoint['state_dict']) del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) print('Run with DistributedDataParallel with divice_ids....') else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) print('Run with DistributedDataParallel without device_ids....') elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = model.cuda() print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) , optimizer, scheduler optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args) if (epoch + 1) % 5 == 0: test(valid_dataset, model, epoch, args) state = { 'epoch': epoch, 'parser': args, 'state_dict': get_state_dict(model) } torch.save( state, os.path.join(args.save_folder, args.dataset, args.network, "checkpoint_{}.pth".format(epoch)))
def exp(subject_id): cuda = torch.cuda.is_available( ) # check if GPU is available, if True chooses to use it device = 'cuda:0' if cuda else 'cpu' if cuda: torch.backends.cudnn.benchmark = True seed = 10 # random seed to make results reproducible # Set random seed to be able to reproduce results set_random_seeds(seed=seed, cuda=cuda) test_subj = np.r_[subject_id] print('test subj:' + str(test_subj)) train_subj = np.setdiff1d(np.r_[1:10], test_subj) tr = [] val = [] #10%씩 떼어내서 val만듬 for ids in train_subj: train_size = int(0.9 * len(splitted[ids])) test_size = len(splitted[ids]) - train_size tr_i, val_i = torch.utils.data.random_split(splitted[ids], [train_size, test_size]) tr.append(tr_i) val.append(val_i) train_set = torch.utils.data.ConcatDataset(tr) valid_set = torch.utils.data.ConcatDataset(val) test_set = BaseConcatDataset([splitted[ids] for ids in test_subj]) # model = Deep4Net( # n_chans, # n_classes, # input_window_samples=input_window_samples, # final_conv_length="auto", # ) crop_size = 1125 embedding_net = EEGNet_v2_old(n_classes, n_chans, crop_size) model = FcClfNet(embedding_net) print(model) batch_size = 64 epochs = 100 from skorch.callbacks import LRScheduler from skorch.helper import predefined_split from braindecode import EEGClassifier # # These values we found good for shallow network: # lr = 0.0625 * 0.01 # weight_decay = 0 # For deep4 they should be: lr = 1 * 0.01 weight_decay = 0.5 * 0.001 batch_size = 64 n_epochs = 200 # # clf = EEGClassifier( # model, # criterion=torch.nn.NLLLoss, # optimizer=torch.optim.AdamW, # train_split=predefined_split(test_set), # using valid_set for validation # optimizer__lr=lr, # optimizer__weight_decay=weight_decay, # batch_size=batch_size, # callbacks=[ # "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)), # ], # device=device, # ) # # Model training for a specified number of epochs. `y` is None as it is already supplied # # in the dataset. # clf.fit(train_set, y=None, epochs=n_epochs) # # # # # train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False) # Send model to GPU if cuda: model.cuda(device) from torch.optim import lr_scheduler import torch.optim as optim import argparse parser = argparse.ArgumentParser( description='cross subject domain adaptation') parser.add_argument('--batch-size', type=int, default=50, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=50, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=True, help='For Saving the current Model') args = parser.parse_args() args.gpuidx = 1 args.seed = 0 args.use_tensorboard = False args.save_model = False optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.5 * 0.001) # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50) # # #test lr # lr = [] # for i in range(200): # scheduler.step() # lr.append(scheduler.get_lr()) # # import matplotlib.pyplot as plt # plt.plot(lr) import pandas as pd results_columns = [ 'val_loss', 'test_loss', 'val_accuracy', 'test_accuracy' ] df = pd.DataFrame(columns=results_columns) for epochidx in range(1, epochs): print(epochidx) train(10, model, device, train_loader, optimizer, scheduler, cuda, device) val_loss, val_score = eval(model, device, valid_loader) test_loss, test_score = eval(model, device, test_loader) results = { 'val_loss': val_loss, 'test_loss': test_loss, 'val_accuracy': val_score, 'test_accuracy': test_score } df = df.append(results, ignore_index=True) print(results) return df
def trainEpochs(trainloader, validateloader, j): lastFile = "" bestLoss = float("inf") bestEpoch = 0 net = torchvision.models.video.r2plus1d_18(num_classes=1) net.to(device) summary(net, input_size=(3, CONFIG["seqLen"], HEIGHT, WIDTH)) optimizer = optim.AdamW(net.parameters(), lr=5e-4, weight_decay=WEIGHT_DECAY) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.93, verbose=False) epoch = 0 while epoch < MAX_EPOCHS and epoch - bestEpoch < PATIENCE: epoch += 1 running_loss = 0.0 count = 0 net.train() pb.reset(len(trainloader)) pb.set_description(f"Train epoch {epoch}") for data in trainloader: # get the inputs; data is a list of [inputs, labels] inputs = data[0].to(device) labels = data[1].to(device).view(-1, 1) #print(inputs[0], labels[0]) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() count += 1 pb.update() l = running_loss / count pb.write(f"train loss of epoch {epoch}: {l:.4f}") scheduler.step() pb.reset(len(validateloader)) pb.set_description(f"Validating") total = 0 net.eval() outputSave = [] with torch.no_grad(): for data in validateloader: inputs = data[0].to(device) labels = data[1].to(device).view(-1, 1) outputs = torch.nn.functional.relu(net(inputs), inplace=True) outputSave.append(outputs) total += criterionValidate(outputs, labels).item() pb.update() loss = total / len(validate) pb.write(f"Validate loss: {loss:.4f}") net.train() if loss < bestLoss: bestLoss = loss bestEpoch = epoch timeS = time.time() fileName = f'results/{loss}-{timeS}-{j}-{SPLITS}' torch.save(net, fileName + '.pt') with open(fileName + '.txt', 'w') as f: for item in outputSave: for x in item: f.write(f"{x.item()}\n") if lastFile != "": os.remove(lastFile + '.txt') os.remove(lastFile + '.pt') lastFile = fileName return bestLoss
def main(): seed = 42 seed_everything(seed) num_epochs = 3 batch_size = 32 skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) train_df = pd.read_csv('data/train.csv') train_df['text'] = train_df['text'].astype(str) train_df['selected_text'] = train_df['selected_text'].astype(str) for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): print(f'Fold: {fold}') model = TweetModel() optimizer = optim.AdamW(model.parameters(), lr=3e-5, betas=(0.9, 0.999)) criterion = loss_fn dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size) train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, f'roberta_fold{fold}.pth') # inference test_df = pd.read_csv('data/test.csv') test_df['text'] = test_df['text'].astype(str) test_loader = get_test_loader(test_df) predictions = [] models = [] for fold in range(skf.n_splits): model = TweetModel() model.cuda() model.load_state_dict(torch.load(f'roberta_fold{fold+1}.pth')) model.eval() models.append(model) for data in test_loader: ids = data['ids'].cuda() masks = data['masks'].cuda() tweet = data['tweet'] offsets = data['offsets'].numpy() start_logits = [] end_logits = [] for model in models: with torch.no_grad(): output = model(ids, masks) start_logits.append( torch.softmax(output[0], dim=1).cpu().detach().numpy()) end_logits.append( torch.softmax(output[1], dim=1).cpu().detach().numpy()) start_logits = np.mean(start_logits, axis=0) end_logits = np.mean(end_logits, axis=0) for i in range(len(ids)): start_pred = np.argmax(start_logits[i]) end_pred = np.argmax(end_logits[i]) if start_pred > end_pred: pred = tweet[i] else: pred = get_selected_text(tweet[i], start_pred, end_pred, offsets[i]) predictions.append(pred) #submission sub_df = pd.read_csv('data/sample_submission.csv') sub_df['selected_text'] = predictions sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('!!!!', '!') if len(x.split()) == 1 else x) sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('..', '.') if len(x.split()) == 1 else x) sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('...', '.') if len(x.split()) == 1 else x) sub_df.to_csv('submission.csv', index=False) sub_df.head()
def train(self, train, dev, bert_model, model, scheduler=None, epochs=50, lr=0.000001, ctx_max_len=400): """ data keys: 'id' 'text' 'label_answerable' 'label_answer' 'attention_mask' 'token_type_ids' """ bert_opt = optim.AdamW(bert_model.parameters(), lr=lr) min_loss = 100000000 model_opt = optim.AdamW(model.parameters(), lr=lr) min_class_loss = 1000000 min_start_loss = 1000000 min_end_loss = 1000000 train_len = len(train) dev_len = len(dev) #weights = torch.tensor([1]*ctx_max_len).float() #weights[0] = 0.00001 #print(weights) criterion = nn.CrossEntropyLoss(ignore_index=-1).to(self.device) """pos_weight=torch.tensor([0.4])""" bce_criterion = nn.BCEWithLogitsLoss( pos_weight=torch.tensor([0.4])).to(self.device) # no truncate bce loss #bce_criterion = nn.BCEWithLogitsLoss().to(self.device) for ep in range(epochs): #train total_loss = 0 class_total_loss = 0 start_total_loss = 0 end_total_loss = 0 model.train() bert_model.train() for i, data in enumerate(train): bert_opt.zero_grad() model_opt.zero_grad() # (batch size , seq_len) input_text = data['text'].to(self.device) #input_text = input_text.permute(1,0) #print(self.tokenizer.decode(data['text'][0])) #(batch size, seq_len) input_attmask = data['attention_mask'].to(self.device) #print(input_attmask[0]) #input_attmask = input_attmask.permute(1,0) #print(pad_index) #(batch size, seq_len) input_token_ids = data['token_type_ids'].to(self.device) linear_mask = 1 - data['token_type_ids'].to(self.device) #print(data['token_type_ids'][0]) linear_mask = linear_mask * data['attention_mask'].to( self.device) #print(linear_mask) #input_token_ids = input_token_ids.permute(1,0) #print(self.tokenizer.decode(data['text'][0][data['label_answer'][0][0].item():data['label_answer'][0][1].item()+1])) #print(data['label_answerable'][0]) total_answerable = torch.sum(data['label_answerable']) #(batch size) label_answerable = data['label_answerable'].to(self.device) #(batch size, output size) label_answer = data['label_answer'].to(self.device) #print(label_answer.size()) #label_answer = label_answer.permute(1,0) bert_output = bert_model(input_ids=input_text, attention_mask=input_attmask, token_type_ids=input_token_ids.long()) #print(bert_output[0].size()) #print(bert_output[1].size()) pad_index = (1 - linear_mask[:, :ctx_max_len]) * 0 total_answer = len(data['text']) for k in range(total_answer): #SEP pad_index[k][data['SEP'][k]:] = 1e9 pad_index = pad_index.to(self.device) pred_answerable, pred_start, pred_end = model(bert_output) #pred_start,pred_end = bert_output #pred_start,pred_end = pred_start[:ctx_max_len].permute(1,0),pred_end[:ctx_max_len].permute(1,0) loss = 0 """if total_answerable != 0 and total_answerable != total_answer: bce_criterion = nn.BCEWithLogitsLoss(reduction='sum', pos_weight=torch.tensor([(total_answer-total_answerable)/total_answerable])).to(self.device)""" """else: bce_criterion = nn.BCEWithLogitsLoss().to(self.device)""" class_loss = bce_criterion(pred_answerable[:, 0], label_answerable.float()) #print(class_loss) #print(pred_start-pad_index) start_end_loss = [] #pred = [pred_start,pred_end] pred_start -= pad_index pred_end -= pad_index #print(torch.softmax(pred_start,dim=1)[0]) #print(label_answer[:,0]) #print(label_answer[:,1]) start_loss = criterion(pred_start[:, 1:], label_answer[:, 0]) end_loss = criterion(pred_end[:, 1:], label_answer[:, 1]) """for t in range(len(pred)): bert_loss = 0 #print(i) berts = pred[t] for j in range(len(berts)): if label_answerable[j]: bert_loss += criterion(berts[j:j+1],(label_answer[j:j+1,t]-1)) start_end_loss +=[bert_loss/total_answerable]""" class_total_loss += class_loss.item() start_total_loss += start_loss.item() end_total_loss += end_loss.item() loss = start_loss + end_loss + class_loss loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 5) torch.nn.utils.clip_grad_norm_(bert_model.parameters(), 5) model_opt.step() bert_opt.step() total_loss += loss.item() if i == 0 or (i + 1) % 10 == 0: #print(pred_answerable) #print(label_answerable) print( f'P,G [0] ={torch.sigmoid(pred_answerable[0]).item()}, {label_answerable[0].item() }, Train : Epoch : {ep}, step : {i+1}, Class Loss: {class_total_loss/(i+1):.2f}, Start Loss: {start_total_loss/(i+1):.2f}, End Loss: {end_total_loss/(i+1):.2f}, Total Loss : {total_loss/(i+1):.2f}', end='\r') if (i + 1) == train_len: #print(pred_answerable) print( f'Train : Epoch : {ep}, step : {i+1}, Class Loss: {class_total_loss/(i+1)}, Start Loss: {start_total_loss/(i+1)}, End Loss: {end_total_loss/(i+1)}, Total Loss : {total_loss/(i+1)}', end='\n') #valid model.eval() bert_model.eval() val_loss = 0 class_total_loss = 0 start_total_loss = 0 end_total_loss = 0 with torch.no_grad(): for i, data in enumerate(dev): # (batch size , seq_len) #print(data['label_answer'][0][0].item()) #print(self.tokenizer.decode(data['text'][0][data['label_answer'][0][0].item():data['label_answer'][0][1].item()+1])) #print(data['id'][0]) input_text = data['text'].to(self.device) #(batch size, seq_len) input_attmask = data['attention_mask'].to(self.device) total_answer = len(data['text']) #(batch size, seq_len) #(batch size, seq_len) input_token_ids = data['token_type_ids'].to(self.device) total_answerable = torch.sum(data['label_answerable']) #(batch size) label_answerable = data['label_answerable'].to(self.device) #(batch size, output size) label_answer = data['label_answer'].to(self.device) #print(label_answer.size()) #label_answer = label_answer.permute(1,0) bert_output = bert_model( input_ids=input_text, attention_mask=input_attmask, token_type_ids=input_token_ids.long()) #pred_start,pred_end = model(bert_output) linear_mask = 1 - data['token_type_ids'].to(self.device) #print(data['token_type_ids'][0]) linear_mask = linear_mask * data['attention_mask'].to( self.device) #print(linear_mask) pad_index = (1 - linear_mask[:, :ctx_max_len]) * 0 total_answer = len(data['text']) for k in range(total_answer): #SEP pad_index[k][data['SEP'][k]:] = 1e9 pad_index = pad_index.to(self.device) pred_answerable, pred_start, pred_end = model(bert_output) #pred_start,pred_end = bert_output #pred_start,pred_end = pred_start[:ctx_max_len].permute(1,0),pred_end[:ctx_max_len].permute(1,0) loss = 0 class_loss = bce_criterion(pred_answerable[:, 0], label_answerable.float()) loss = 0 """if total_answerable != total_answer: bce_criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([(total_answer-total_answerable)/total_answerable])).to(self.device) else: bce_criterion = nn.BCEWithLogitsLoss().to(self.device) class_loss = bce_criterion(pred_answerable.squeeze(),label_answerable.float())""" #print(class_loss) pred_start -= pad_index pred_end -= pad_index start_loss = criterion(pred_start[:, 1:], label_answer[:, 0]) end_loss = criterion(pred_end[:, 1:], label_answer[:, 1]) """for t in range(len(pred)): bert_loss = 0 #print(i) berts = pred[t] for j in range(len(berts)): if label_answerable[j]: bert_loss += criterion(berts[j:j+1],(label_answer[j:j+1,t]-1)) start_end_loss +=[bert_loss/total_answerable]""" class_total_loss += class_loss.item() start_total_loss += start_loss.item() end_total_loss += end_loss.item() loss = start_loss + end_loss + class_loss val_loss += loss.item() if i == 0 or (i + 1) % 10 == 0: print( f'P,G [0] ={torch.sigmoid(pred_answerable[0]).item()}, {label_answerable[0].item() }, Valid : Epoch : {ep}, step : {i+1}, Class Loss: {class_total_loss/(i+1):.2f}, Start Loss: {start_total_loss/(i+1):.2f}, End Loss: {end_total_loss/(i+1):.2f}, Total Loss : {val_loss/(i+1):.2f}', end='\r') if (i + 1) == dev_len: #print(pred_answerable) print( f'Valid : Epoch : {ep}, step : {i+1}, Class Loss: {class_total_loss/(i+1)}, Start Loss: {start_total_loss/(i+1)}, End Loss: {end_total_loss/(i+1)}, Total Loss : {val_loss/(i+1)}', end='\n') val_loss /= (i + 1) if min_class_loss > class_total_loss / dev_len: print("Save Class model............") min_class_loss = class_total_loss / dev_len torch.save(model.linear_answerable.state_dict(), "ckpt/best_class_notru.ckpt") if min_start_loss > (start_total_loss) / dev_len: print("Save Start model............") min_start_loss = (start_total_loss) / dev_len torch.save(model.linear_start.state_dict(), "ckpt/best_linear_start_notru.ckpt") if min_end_loss > (end_total_loss) / dev_len: print("Save End model............") min_end_loss = (end_total_loss) / dev_len torch.save(model.linear_end.state_dict(), "ckpt/best_linear_end_notru.ckpt") if min_loss > val_loss: print("Save Bert model............") min_loss = val_loss torch.save(bert_model.state_dict(), "ckpt/best_bert_notru.ckpt")
def run_app(): # dataset_textbox = st.sidebar.text_input('dataset path', value='C:\\Users\\Admin\\Downloads\\i\\n01514859\\') DATASET_PATH = st.text_input('DATASET PATH', value='C:\\Users\\Admin\\Downloads\\i\\n01514859\\') epoch_loc = st.empty() prog_bar = st.empty() loss_loc = st.empty() global_loss_loc = st.empty() loss_chart = st.empty() glob_loss_chart = st.empty() row0 = st.empty() row1 = st.empty() row2 = st.empty() row3 = st.empty() row4 = st.empty() row5 = st.empty() # st.stop() PATH = "upscaler.pt" net = Net() # too lazy to detect if the file exits. try: net.load_state_dict(torch.load(PATH)) st.write('MODEL LOADED!') except Exception: pass cuda = torch.device('cuda') net.to(cuda) # criterion = nn.CrossEntropyLoss() # criterion = nn.MSELoss() criterion = kornia.losses.PSNRLoss(1.0) LEARNING_RATE = 0.01 optimizer = optim.AdamW(net.parameters(), lr=LEARNING_RATE) # st.title('image upscaler') img = load_img('image.png') losses = deque(maxlen=100) global_losses = deque(maxlen=100) EPOCHS = 500 BATCH_SIZE = 1 dataset = ImageDataset(path=DATASET_PATH) def collate_wrapper(samples): return samples train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_wrapper) for epoch in range(EPOCHS): i = 1 epoch_loc.write(f"EPOCH:\t{epoch}/{EPOCHS - 1}") global_loss = torch.tensor([0.0], device=cuda) optimizer.zero_grad() # TODO: confirm that shuffle works # -------------------- for batch in train_loader: optimizer.zero_grad() loss = torch.tensor([0.0], device=cuda) for sample in batch: x, y = sample x = torch.tensor(x) x = torch.unsqueeze(x, 0) try: image = x.permute(0, 3, 1, 2) image = F.interpolate(image, size=(128, 128)) image = image.permute(0, 2, 3, 1) row1.image(image.numpy(), width=250, caption='original image') except Exception: break x = x.permute(0, 3, 1, 2) y = F.interpolate(x, size=(64, 64)) x = F.interpolate(x, size=(32, 32)) x = F.interpolate(x, size=(64, 64)) row2.image(x.permute(0, 2, 3, 1).detach().numpy(), width=250, caption='Downsampled') prog_bar.progress(i / len(dataset)) i += 1 out = net(x.detach().cuda().float()) diff = torch.abs(out.detach().cpu() - y.detach().cpu()) diff_image = diff.permute(0, 2, 3, 1).numpy() row5.image(diff_image, width=250, caption='absolute difference') row3.image(out.permute(0, 2, 3, 1).detach().cpu().numpy(), width=250, caption='Reconstructed') loss = 1 / criterion(out, y.detach().cuda().float()) # loss = criterion(out, y.detach().cuda().float()) row4.write(f'LOSS: {loss.detach().cpu()}') # loss.backward() # optimizer.step() # st.stop() losses.append(loss.detach().cpu().numpy()) loss_chart.line_chart( pd.DataFrame(losses, columns=['loss',]) ) global_loss += loss loss_loc.write(f"LOSS:\t{loss.detach().cpu()}") loss.backward() optimizer.step() global_loss_loc.write(f"GLOBAL LOSS:\t{global_loss.detach().cpu()} \nGLOB AVERAGE LOSS:\t{global_loss.detach().cpu()/len(dataset)}") global_losses.append(global_loss.detach().cpu().numpy()) glob_loss_chart.line_chart( pd.DataFrame(global_losses, columns=['global_loss', ]) ) try: torch.save(net.state_dict(), PATH) st.write('MODEL SAVED!') except Exception: pass
def main(args): train_loader, test_loader, DATASET_CONFIG = get_loader(args) n_data = len(train_loader.dataset) logger.info(f"length of training dataset: {n_data}") n_data = len(test_loader.dataset) logger.info(f"length of testing dataset: {n_data}") model, criterion = get_model(args, DATASET_CONFIG) if dist.get_rank() == 0: logger.info(str(model)) # optimizer if args.optimizer == 'adamW': param_dicts = [ { "params": [ p for n, p in model.named_parameters() if "decoder" not in n and p.requires_grad ] }, { "params": [ p for n, p in model.named_parameters() if "decoder" in n and p.requires_grad ], "lr": args.decoder_learning_rate, }, ] optimizer = optim.AdamW(param_dicts, lr=args.learning_rate, weight_decay=args.weight_decay) else: raise NotImplementedError scheduler = get_scheduler(optimizer, len(train_loader), args) model = model.cuda() model = DistributedDataParallel(model, device_ids=[args.local_rank], broadcast_buffers=False) if args.checkpoint_path: assert os.path.isfile(args.checkpoint_path) load_checkpoint(args, model, optimizer, scheduler) # Used for AP calculation CONFIG_DICT = { 'remove_empty_box': False, 'use_3d_nms': True, 'nms_iou': 0.25, 'use_old_type_nms': False, 'cls_nms': True, 'per_class_proposal': True, 'conf_thresh': 0.0, 'dataset_config': DATASET_CONFIG } for epoch in range(args.start_epoch, args.max_epoch + 1): train_loader.sampler.set_epoch(epoch) tic = time.time() train_one_epoch(epoch, train_loader, DATASET_CONFIG, model, criterion, optimizer, scheduler, args) logger.info('epoch {}, total time {:.2f}, ' 'lr_base {:.5f}, lr_decoder {:.5f}'.format( epoch, (time.time() - tic), optimizer.param_groups[0]['lr'], optimizer.param_groups[1]['lr'])) if epoch % args.val_freq == 0: evaluate_one_epoch(test_loader, DATASET_CONFIG, CONFIG_DICT, args.ap_iou_thresholds, model, criterion, args) if dist.get_rank() == 0: # save model save_checkpoint(args, epoch, model, optimizer, scheduler) evaluate_one_epoch(test_loader, DATASET_CONFIG, CONFIG_DICT, args.ap_iou_thresholds, model, criterion, args) save_checkpoint(args, 'last', model, optimizer, scheduler, save_cur=True) logger.info("Saved in {}".format( os.path.join(args.log_dir, f'ckpt_epoch_last.pth'))) return os.path.join(args.log_dir, f'ckpt_epoch_last.pth')
ranks = self.tokenizer.convert_ids_to_tokens(best_words[:3000]) for i in ranks: if i in all_lemma: return i return wn_simple_lesk_predictor(context) if __name__ == "__main__": #W2VMODEL_FILENAME = 'GoogleNews-vectors-negative300.bin.gz' #predictor = Word2VecSubst(W2VMODEL_FILENAME) train_set = lexDataset(filename=sys.argv[1]) train_loader = DataLoader(train_set, batch_size=20, num_workers=0) net = MLM(freeze_bert=True).to('cuda') criterion = nn.CrossEntropyLoss() opti = optim.AdamW(net.parameters(), lr=1e-5) train(net, criterion, opti, train_loader) print('net trained!') net.eval() model = myPredictor(net) #nltk.download('stopwords') #bert= BertPredictor() #nltk.download('wordnet') for context in read_lexsub_xml(sys.argv[1]): #print(context) # useful for debugging prediction = model.predict(context) print("{}.{} {} :: {}".format(context.lemma, context.pos, context.cid, prediction))
def _build_trainer(config, model, vocab, train_data, valid_data): optimizer = optim.AdamW(model.parameters(), lr=config.trainer.lr) scheduler = None is_bert_based = any( model.name.endswith('bert') for model in config.embedder.models) is_trainable_elmo_based = any( model.name == 'elmo' and model.params['requires_grad'] for model in config.embedder.models) if is_bert_based or is_trainable_elmo_based: def _is_pretrained_param(name): return 'transformer_model' in name or '_elmo_lstm' in name pretrained_params, non_pretrained_params = [], [] for name, param in model.named_parameters(): if _is_pretrained_param(name): logger.info('Pretrained param: %s', name) pretrained_params.append(param) else: logger.info('Non-pretrained param: %s', name) non_pretrained_params.append(param) optimizer = optim.AdamW([{ 'params': pretrained_params, 'lr': config.trainer.bert_lr }, { 'params': non_pretrained_params, 'lr': config.trainer.lr }, { 'params': [] }]) scheduler = SlantedTriangular( optimizer=optimizer, num_epochs=config.trainer.num_epochs, num_steps_per_epoch=len(train_data) / config.trainer.batch_size, cut_frac=config.trainer.cut_frac, gradual_unfreezing=config.trainer.gradual_unfreezing, discriminative_fine_tuning=config.trainer. discriminative_fine_tuning) logger.info('Trainable params:') for name, param in model.named_parameters(): if param.requires_grad: logger.info('\t' + name) iterator = BucketIterator(batch_size=config.trainer.batch_size) iterator.index_with(vocab) if torch.cuda.is_available(): cuda_device = 0 model = model.cuda(cuda_device) logger.info('Using cuda') else: cuda_device = -1 logger.info('Using cpu') logger.info('Example batch:') _log_batch(next(iterator(train_data))) if is_bert_based: train_data = _filter_data(train_data, vocab) valid_data = _filter_data(valid_data, vocab) return Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_data, validation_dataset=valid_data, validation_metric='+MeanAcc', patience=config.trainer.patience, num_epochs=config.trainer.num_epochs, cuda_device=cuda_device, grad_clipping=5., learning_rate_scheduler=scheduler, serialization_dir=os.path.join(config.data.models_dir, config.model_name), should_log_parameter_statistics=False, should_log_learning_rate=False, num_gradient_accumulation_steps=config.trainer. num_gradient_accumulation_steps)
def train(args): save_dir = os.path.join( args.save_dir, args.model_pre + args.backbone + '_' + datetime.now().strftime('%Y%m%d_%H%M%S')) os.makedirs(save_dir) writer = SummaryWriter(save_dir) multi_gpus = False if len(args.gpus.split(',')) > 1: print('{} GPUs for use'.format(len(args.gpus.split(',')))) multi_gpus = True os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with open(args.label_dict_path, 'rb') as f: label_dict = pickle.load(f) dataset = TrainData(args.root_path, label_dict, input_size=224) dataloader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) # model = torchvision.models.resnet50() # model.fc = nn.Sequential( # nn.Linear(2048, 512, bias=False), # nn.BatchNorm1d(512), # nn.ReLU(inplace=True), # nn.Linear(512, args.feature_dim, bias=False)) model = torchvision.models.alexnet() model.classifier[6] = torch.nn.Linear(in_features=4096, out_features=args.feature_dim, bias=False) # model = my_alexnet(sobel=False, bn=True, out=args.feature_dim) args.lr_decay_epochs = [ int(step) for step in args.lr_decay_epochs.split(',') ] args.start_epoch = 1 total_iters = 0 # if args.resume and os.path.isfile(args.resume): # checkpoint = torch.load(args.resume, map_location='cpu') # args.start_epoch = checkpoint['epoch'] + 1 # model.load_state_dict(checkpoint['model']) # total_iters = checkpoint['iters'] # print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) # del checkpoint # torch.cuda.empty_cache() if multi_gpus: model = torch.nn.DataParallel(model).to(device) else: model = model.to(device) criterion = torch.nn.BCEWithLogitsLoss() if args.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.init_lr, momentum=0.9, nesterov=True, weight_decay=1e-5) else: optimizer = optim.AdamW(model.parameters(), lr=args.init_lr, weight_decay=1e-4) model.train() if args.resume and os.path.isfile(args.resume): checkpoint = torch.load(args.resume, map_location='cpu') optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint torch.cuda.empty_cache() len_dataloader = len(dataloader) for epoch in range(args.start_epoch, args.total_epoch + 1): adjust_learning_rate(epoch, args, optimizer) print('Train Epoch: {}/{} ...'.format(epoch, args.total_epoch)) # total_loss = [] s = time.time() for step, (imgs, label) in enumerate(dataloader): imgs = imgs.to(device) labels = label.to(device) target = torch.sigmoid(labels) optimizer.zero_grad() output = model(imgs) loss = criterion(output, target) loss.backward() optimizer.step() # total_loss.append(loss.item()) total_iters += 1 if (step + 1) % args.log_step == 0: duration = (time.time() - s) / args.log_step examples_per_sec = args.total_epoch / float(duration) print( 'Epoch: [%d/%d], Step: [%d/%d], loss = %.4f, %.2f examples/sec, %.2f sec/batch' % (epoch, args.total_epoch, step + 1, len_dataloader, loss.item(), examples_per_sec, duration)) s = time.time() writer.add_scalar('loss', loss.item(), total_iters) writer.add_scalar('sup_lr', optimizer.param_groups[0]['lr'], total_iters) writer.add_scalar('epoch', epoch, total_iters) # print('Speed: %.2f for one epoch %d/%d, Mean Loss = %.4f' % # (time.time() - start_time, epoch, EPOCH, sum(total_loss) / len(total_loss))) if epoch % args.save_freq == 0: if multi_gpus: model_state_dict = model.module.state_dict() else: model_state_dict = model.state_dict() state = { 'model': model.state_dict(), # 'optimizer': optimizer.state_dict(), # 'epoch': epoch, # 'iters': total_iters, } torch.save( state, os.path.join( save_dir, 'Epoch_%02d_Iter_%06d_model.pth' % (epoch, total_iters))) del state print('Finishing training!') writer.close()
# 加载数据 train_dataloader = get_dataloader('../hw3_CNN/data', 'training', batch_size, cuda) valid_dataloader = get_dataloader('../hw3_CNN/data', 'validation', batch_size, cuda) print('Data Loaded') # 加载网络 old_net = StudentNet() if cuda: old_net = old_net.cuda() old_net.load_state_dict(torch.load('./weights/student_model.bin')) # 开始剪枝并finetune:独立剪枝prune_count次,每次剪枝的剪枝率按prune_rate逐渐增大,剪枝后微调finetune_epochs个epoch criterion = nn.CrossEntropyLoss() optimizer = optim.AdamW(old_net.parameters(), lr=1e-3) now_width_mult = 1 for i in range(prune_count): now_width_mult *= prune_rate # 增大剪枝率 new_net = StudentNet(width_mult=now_width_mult) if cuda: new_net = new_net.cuda() new_net = network_slimming(old_net, new_net) now_best_acc = 0 for epoch in range(finetune_epochs): new_net.train() train_loss, train_acc = run_epoch(train_dataloader) new_net.eval() valid_loss, valid_acc = run_epoch(valid_dataloader) # 每次剪枝时存下最好的model
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True) def get_model(n_classes: int): m = models.resnet18(pretrained=True) in_features = m.fc.in_features m.fc = nn.Linear(in_features, n_classes) return m n_classes = next(iter(train_dl))[-1].shape[-1] m = get_model(n_classes) optimizer = optim.AdamW(m.parameters(), lr=LR) loss_func = nn.functional.cross_entropy @train_callback(Stage.start_epoch, auto=True) def print_epochs(ctx: TrainingContext): """ Prints current epoch number at epoch start """ print(f"Epoch {ctx.epoch + 1}/{ctx.n_epochs}") @train_callback(Stage.end_batch, auto=True) def print_iters(ctx: TrainingContext): """ Print iterations after batch end """ print( f"Number of batches processed: {ctx.iters} | Loss: {ctx.loss.item():.3f}" )
def main(args): use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(1) device = torch.device('cuda' if use_cuda else 'cpu') train_dataset = WakeWordData(data_json=args.train_data_json, sample_rate=args.sample_rate, valid=False) test_dataset = WakeWordData(data_json=args.test_data_json, sample_rate=args.sample_rate, valid=True) kwargs = { 'num_workers': args.num_workers, 'pin_memory': True } if use_cuda else {} train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn, **kwargs) test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.eval_batch_size, shuffle=True, collate_fn=collate_fn, **kwargs) model_params = { "num_classes": 1, "feature_size": 40, "hidden_size": args.hidden_size, "num_layers": 1, "dropout": 0.1, "bidirectional": False } model = LSTMWakeWord(**model_params, device=device) model = model.to(device) optimizer = optim.AdamW(model.parameters(), lr=args.lr) loss_fn = nn.BCEWithLogitsLoss() scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2) best_train_acc, best_train_report = 0, None best_test_acc, best_test_report = 0, None best_epoch = 0 for epoch in range(args.epochs): print("\nstarting training with learning rate", optimizer.param_groups[0]['lr']) train_acc, train_report = train(train_loader, model, optimizer, loss_fn, device, epoch) test_acc, test_report = test(test_loader, model, device, epoch) # record best train and test if train_acc > best_train_acc: best_train_acc = train_acc if test_acc > best_test_acc: best_test_acc = test_acc # saves checkpoint if metrics are better than last if args.save_checkpoint_path and test_acc >= best_test_acc: checkpoint_path = os.path.join(args.save_checkpoint_path, args.model_name + ".pt") print("found best checkpoint. saving model as", checkpoint_path) save_checkpoint( checkpoint_path, model, optimizer, scheduler, model_params, notes="train_acc: {}, test_acc: {}, epoch: {}".format( best_train_acc, best_test_acc, epoch), ) best_train_report = train_report best_test_report = test_report best_epoch = epoch table = [["Train ACC", train_acc], ["Test ACC", test_acc], ["Best Train ACC", best_train_acc], ["Best Test ACC", best_test_acc], ["Best Epoch", best_epoch]] # print("\ntrain acc:", train_acc, "test acc:", test_acc, "\n", # "best train acc", best_train_acc, "best test acc", best_test_acc) print(tabulate(table)) scheduler.step(train_acc) print("Done Training...") print("Best Model Saved to", checkpoint_path) print("Best Epoch", best_epoch) print("\nTrain Report \n") print(best_train_report) print("\nTest Report\n") print(best_test_report)
def run_app(): # GUI epoch_loc = st.empty() prog_bar = st.empty() loss_loc = st.empty() global_loss_loc = st.empty() col1, col2, col3 = st.beta_columns(3) img_loc = col1.empty() kernel_loc = col2.empty() stats_loc = col3.empty() image_meta_loc = st.empty() right_chart = st.empty() loss_chart = st.empty() glob_loss_chart = st.empty() # right_chart = st.empty() test_progress_bar = st.empty() testing_chart = st.empty() test_stats = st.empty() PATH = 'model_TYPE2.pt' cuda = torch.device('cuda') cpu = torch.device('cpu') net = Net(kernel_loc) try: net.load_state_dict(torch.load(PATH)) except Exception: pass net.to(cuda) criterion = nn.CrossEntropyLoss() optimizer = optim.AdamW(net.parameters(), lr=0.00001) dataset = EegDataset() losses = deque(maxlen=100) global_losses = deque(maxlen=100) right_list = deque(maxlen=100) wrong_list = deque(maxlen=100) EPOCHS = 10 for epoch in range(EPOCHS): i = 1 epoch_loc.write(f"EPOCH:\t{epoch}/{EPOCHS-1}") global_loss = torch.tensor([0.0], device=cuda) optimizer.zero_grad() right = 0 wrong = 0 def collate_wrapper(samples): return samples #TODO: confirm that shuffle works train_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_wrapper) for batch in train_loader: optimizer.zero_grad() loss = torch.tensor([0.0], device=cuda) for sample in batch: x, y = sample prog_bar.progress(i / len(dataset)) i += 1 img_loc.image(x.numpy(), width=200) # image_meta_loc.write(f"ID:\t{image.id} \nCategory:\t{image.category}") out = net(x.cuda().float()).unsqueeze(0) out_id = torch.argmax(out.detach().cpu(), 1) target_id = y.cuda() # target = torch.tensor([dataset.categories.index(image.category)]).cuda() # stats_loc.write(f"OUTPUT:\t{torch.argmax(out.detach().cpu(), 1)} \nTARGET:\t{target.detach().cpu()}") stats_loc.write(f"OUTPUT:\t{out_id} \nTARGET:\t{target_id}") loss += criterion(out, target_id) if out_id == target_id.detach().cpu(): right += 1 # use len(dataset.categories) ; i want to divide by the number of categories. loss = loss * (1 / len(dataset)) else: wrong += 1 loss = loss * 1 losses.append(loss.detach().cpu().numpy()) loss_chart.line_chart(pd.DataFrame(losses, columns=[ 'loss', ])) global_loss += loss loss_loc.write( f"LOSS:\t{loss.detach().cpu()} \nRIGHT:\t{right}/{len(dataset)} \nWRONG:\t{wrong}/{len(dataset)}" ) loss.backward() optimizer.step() right_list.append(right) wrong_list.append(wrong) rc_data = pd.DataFrame(np.array( [[r, w] for r, w in zip(right_list, wrong_list)]), columns=['right', 'wrong']) right_chart.line_chart(rc_data) # wc_data = pd.DataFrame(np.array(wrong_list), columns=['wrong',]) global_loss_loc.write( f"GLOBAL LOSS:\t{global_loss.detach().cpu()} \nGLOB AVERAGE LOSS:\t{global_loss.detach().cpu()/len(dataset)}" ) global_losses.append(global_loss.detach().cpu().numpy()) glob_loss_chart.line_chart( pd.DataFrame(global_losses, columns=[ 'global_loss', ])) # global_loss.backward() # optimizer.step() # # TESTING PHASE: dataset = EegDataset(testing=True) def collate_wrapper(samples): return samples test_loader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=collate_wrapper) right = 0 wrong = 0 st.write('TESTING!!!!!!!!!!!!!!!!!!!/EVALUATING????') i = 1 with torch.no_grad(): for batch in test_loader: for sample in batch: x, y = sample test_progress_bar.progress(i / len(dataset)) i += 1 out = net(x.cuda().float()) out_id = torch.argmax(out.detach().cpu(), 0) target_id = y if out_id == target_id: right += 1 else: wrong += 1 test_stats.write( f'RIGHT: {right}/{len(dataset)} \nWRONG: {wrong}/{len(dataset)}' ) torch.save(net.state_dict(), PATH)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # ---------------------------------------------------------------------------------------- # Create model(s) and send to device(s) # ---------------------------------------------------------------------------------------- net = model.ResUNet(3, args.batch_norm).float() if args.distributed: if args.gpu is not None: torch.cuda.set_device(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel( net, device_ids=[args.gpu]) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) elif args.gpu is not None: torch.cuda.set_device(args.gpu) net.cuda(args.gpu) else: net.cuda(args.gpu) net = torch.nn.parallel.DistributedDataParallel(net) # ---------------------------------------------------------------------------------------- # Define dataset path and data splits # ---------------------------------------------------------------------------------------- #Input_Data = #scipy.io.loadmat("\Path\To\Inputs.mat") #Output_Data = #scipy.io.loadmat("\Path\To\Outputs.mat") Input = utilities.load_obj( f'{args.path_to_data}/inputs') #Input_Data['Inputs'] Output = utilities.load_obj( f'{args.path_to_data}/outputs') # Output_Data['Outputs'] spectra_num = len(Input) train_split = round(0.9 * spectra_num) val_split = round(0.1 * spectra_num) input_train = Input[:train_split] input_val = Input[train_split:train_split + val_split] output_train = Output[:train_split] output_val = Output[train_split:train_split + val_split] # ---------------------------------------------------------------------------------------- # Create datasets (with augmentation) and dataloaders # ---------------------------------------------------------------------------------------- Raman_Dataset_Train = dataset.RamanDataset(input_train, output_train, batch_size=args.batch_size, spectrum_len=args.spectrum_len, spectrum_shift=0.1, spectrum_window=False, horizontal_flip=False, mixup=True) Raman_Dataset_Val = dataset.RamanDataset(input_val, output_val, batch_size=args.batch_size, spectrum_len=args.spectrum_len) train_loader = DataLoader(Raman_Dataset_Train, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) val_loader = DataLoader(Raman_Dataset_Val, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) # ---------------------------------------------------------------------------------------- # Define criterion(s), optimizer(s), and scheduler(s) # ---------------------------------------------------------------------------------------- criterion = nn.L1Loss().cuda(args.gpu) criterion_MSE = nn.MSELoss().cuda(args.gpu) if args.optimizer == "sgd": optimizer = optim.SGD(net.parameters(), lr=args.lr) elif args.optimizer == "adamW": optimizer = optim.AdamW(net.parameters(), lr=args.lr) else: # Adam optimizer = optim.Adam(net.parameters(), lr=args.lr) if args.scheduler == "decay-lr": scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.2) elif args.scheduler == "multiplicative-lr": lmbda = lambda epoch: 0.985 scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda) elif args.scheduler == "cyclic-lr": scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=args.base_lr, max_lr=args.lr, mode='triangular2', cycle_momentum=False) elif args.scheduler == "one-cycle-lr": scheduler = optim.lr_scheduler.OneCycleLR( optimizer, max_lr=args.lr, steps_per_epoch=len(train_loader), epochs=args.epochs, cycle_momentum=False) else: # constant-lr scheduler = None print('Started Training') print('Training Details:') #print('Network: {}'.format(args.network)) print('Epochs: {}'.format(args.epochs)) print('Batch Size: {}'.format(args.batch_size)) print('Optimizer: {}'.format(args.optimizer)) print('Scheduler: {}'.format(args.scheduler)) print('Learning Rate: {}'.format(args.lr)) print('Spectrum Length: {}'.format(args.spectrum_len)) DATE = datetime.datetime.now().strftime("%Y_%m_%d") log_dir = "runs/{}_{}_{}".format(DATE, args.optimizer, args.scheduler) #, args.network) models_dir = "{}_{}_{}.pt".format(DATE, args.optimizer, args.scheduler) #, args.network) writer = SummaryWriter(log_dir=log_dir) for epoch in range(args.epochs): train_loss = train(train_loader, net, optimizer, scheduler, criterion, criterion_MSE, epoch, args) val_loss = validate(val_loader, net, criterion_MSE, args) if args.scheduler == "decay-lr" or args.scheduler == "multiplicative-lr": scheduler.step() writer.add_scalar('Loss/train', train_loss, epoch) writer.add_scalar('Loss/val', val_loss, epoch) torch.save(net.state_dict(), models_dir) print('Finished Training')
def Train(myVoc, table, training_batches, training_item_batches, candidate_items, candidate_users, training_batch_labels, directory, TrainEpoch=100, latentK=32, hidden_size=300, intra_method='dualFC', inter_method='dualFC', learning_rate=0.00001, dropout=0, isStoreModel=False, isStoreCheckPts=False, WriteTrainLoss=False, store_every=2, use_pretrain_item=False, isCatItemVec=True, randomSetup=-1, pretrain_wordVec=None): # Get asin and reviewerID from file asin, reviewerID = pre_work.Read_Asin_Reviewer(table) # Initialize textual embeddings if (pretrain_wordVec != None): embedding = pretrain_wordVec else: embedding = nn.Embedding(myVoc.num_words, hidden_size) # Initialize asin/reviewer embeddings if (use_pretrain_item): asin_embedding = torch.load( R'PretrainingEmb/item_embedding_fromGRU.pth') else: asin_embedding = nn.Embedding(len(asin), hidden_size) reviewerID_embedding = nn.Embedding(len(reviewerID), hidden_size) # Initialize IntraGRU models and optimizers IntraGRU = list() IntraGRU_optimizer = list() # Initialize IntraGRU optimizers groups intra_scheduler = list() # Append GRU model asc for idx in range(opt.num_of_reviews): IntraGRU.append( IntraReviewGRU(hidden_size, embedding, asin_embedding, reviewerID_embedding, latentK=latentK, method=intra_method)) # Use appropriate device IntraGRU[idx] = IntraGRU[idx].to(device) IntraGRU[idx].train() # Initialize optimizers IntraGRU_optimizer.append( optim.AdamW(IntraGRU[idx].parameters(), lr=learning_rate, weight_decay=0.001)) # Assuming optimizer has two groups. intra_scheduler.append( optim.lr_scheduler.StepLR(IntraGRU_optimizer[idx], step_size=20, gamma=0.3)) # Initialize InterGRU models InterGRU = HANN(hidden_size, embedding, asin_embedding, reviewerID_embedding, n_layers=1, dropout=dropout, latentK=latentK, isCatItemVec=isCatItemVec, method=inter_method) # Use appropriate device InterGRU = InterGRU.to(device) InterGRU.train() # Initialize IntraGRU optimizers InterGRU_optimizer = optim.AdamW(InterGRU.parameters(), lr=learning_rate, weight_decay=0.001) # Assuming optimizer has two groups. inter_scheduler = optim.lr_scheduler.StepLR(InterGRU_optimizer, step_size=10, gamma=0.3) print('Models built and ready to go!') for Epoch in range(TrainEpoch): # Run a training iteration with batch group_loss = trainIteration(IntraGRU, InterGRU, IntraGRU_optimizer, InterGRU_optimizer, training_batches, training_item_batches, candidate_items, candidate_users, training_batch_labels, isCatItemVec=isCatItemVec, randomSetup=randomSetup) inter_scheduler.step() for idx in range(opt.num_of_reviews): intra_scheduler[idx].step() num_of_iter = len(training_batches[0]) * len(training_batch_labels) current_loss_average = group_loss / num_of_iter print('Epoch:{}\tSE:{}\t'.format(Epoch, current_loss_average)) if (Epoch % store_every == 0 and isStoreModel): torch.save( InterGRU, R'{}/Model/InterGRU_epoch{}'.format(opt.save_dir, Epoch)) for idx__, IntraGRU__ in enumerate(IntraGRU): torch.save( IntraGRU__, R'{}/Model/IntraGRU_idx{}_epoch{}'.format( opt.save_dir, idx__, Epoch)) if WriteTrainLoss: with open(R'{}/Loss/TrainingLoss.txt'.format(opt.save_dir), 'a') as file: file.write('Epoch:{}\tSE:{}\n'.format(Epoch, current_loss_average)) # Save checkpoint if (Epoch % store_every == 0 and isStoreCheckPts): # Store intra-GRU model for idx__, IntraGRU__ in enumerate(IntraGRU): state = { 'epoch': Epoch, 'num_of_review': idx__, 'intra{}'.format(idx__): IntraGRU__.state_dict(), 'intra{}_opt'.format(idx__): IntraGRU_optimizer[idx__].state_dict(), 'train_loss': current_loss_average, 'voc_dict': myVoc.__dict__, 'embedding': embedding.state_dict() } torch.save( state, R'{}/checkpts/IntraGRU_idx{}_epoch{}'.format( opt.save_dir, idx__, Epoch)) # Store inter-GRU model state = { 'epoch': Epoch, 'inter': InterGRU.state_dict(), 'inter_opt': InterGRU_optimizer.state_dict(), 'train_loss': current_loss_average, 'voc_dict': myVoc.__dict__, 'embedding': embedding.state_dict() } torch.save( state, R'{}/checkpts/InterGRU_epoch{}'.format(opt.save_dir, Epoch))
model = get_efficientnet(model_name=model_name) if model_path is not None: # model = torch.load(model_path) model.load_state_dict(torch.load(model_path, map_location='cpu')) print('Model found in {}'.format(model_path)) else: print('No model found, initializing random model.') model = model.cuda(device_id) train_logger = Logger(model_name=writeFile, header=['epoch', 'loss', 'acc', 'lr']) # criterion = nn.CrossEntropyLoss() criterion = LabelSmoothing(smoothing=0.05).cuda(device_id) # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) # optimizer = optim.Adam(model.parameters(), lr=lr) optimizer = optim.AdamW(model.parameters(), lr=lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) is_train = True if is_train: xdl = DeeperForensicsDatasetNew( real_npys=train_real_paths_npy, fake_npys=train_fake_paths_npy, is_one_hot=True, transforms=get_train_transforms(size=300)) train_loader = DataLoader(xdl, batch_size=batch_size, shuffle=False, num_workers=4, sampler=BalanceClassSampler( labels=xdl.get_labels(),
adj, features, labels, idx_train, idx_val, idx_test = load_data( adj=sp_adj, features=features_merged_sparse, labels=df_merged_labels, index=image_index) # features = sparse.csr_matrix(features) # adj, features, labels, idx_train, idx_val, idx_test = load_data(adj=sp_adj,features=features_merged_sparse,labels=df_merged_labels) # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=args.hidden, nclass=labels.max().item() + 1, dropout=args.dropout) optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # c = list(zip(not_missing,features_array, adj_array)) # r,a, b = zip(*c) from torch.utils.tensorboard import SummaryWriter # default `log_dir` is "runs" - we'll be more specific here writer = SummaryWriter('runs/gcn_cord_batch') # optimizer = optim.SparseAdam(model.parameters(),lr=args.lr) if args.cuda: model.cuda() features = features.cuda()
if args.mul: net._modules["0"]._modules.get("conv").register_forward_hook( hook_feature) best_loss = checkpoint["loss"] if vall else 100 alpha = checkpoint["alpha"] if vall else args.lr if args.opt == "Adam": optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=alpha, weight_decay=args.wd) #optimizer = optim.Adam(net.parameters(), lr=alpha, weight_decay=args.wd) elif args.opt == "AdamW": optimizer = optim.AdamW(net.parameters(), lr=alpha, weight_decay=args.wd) elif args.opt == "SGD": optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=alpha, weight_decay=args.wd, momentum=args.m) #optimizer = optim.SGD(net.parameters(), lr=alpha, weight_decay=args.wd, momentum=args.m) else: optimizer = optim.RMSprop(net.parameters(), lr=alpha, weight_decay=args.wd, momentum=args.m) if vall: optimizer.load_state_dict(checkpoint["optimizer"])
def create_optimizer(args, model, filter_bias_and_bn=True): opt_lower = args.opt.lower() weight_decay = args.weight_decay if weight_decay and filter_bias_and_bn: skip = {} if hasattr(model, 'no_weight_decay'): skip = model.no_weight_decay parameters = add_weight_decay(model, weight_decay, skip) weight_decay = 0. else: parameters = model.parameters() if 'fused' in opt_lower: assert has_apex and torch.cuda.is_available( ), 'APEX and CUDA required for fused optimizers' opt_args = dict(lr=args.lr, weight_decay=weight_decay) if hasattr(args, 'opt_eps') and args.opt_eps is not None: opt_args['eps'] = args.opt_eps if hasattr(args, 'opt_betas') and args.opt_betas is not None: opt_args['betas'] = args.opt_betas opt_split = opt_lower.split('_') opt_lower = opt_split[-1] if opt_lower == 'sgd' or opt_lower == 'nesterov': del opt_args['eps'] optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=True, **opt_args) elif opt_lower == 'momentum': del opt_args['eps'] optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=False, **opt_args) elif opt_lower == 'adam': optimizer = optim.Adam(parameters, **opt_args) elif opt_lower == 'adamw': optimizer = optim.AdamW(parameters, **opt_args) elif opt_lower == 'nadam': optimizer = Nadam(parameters, **opt_args) elif opt_lower == 'radam': optimizer = RAdam(parameters, **opt_args) elif opt_lower == 'adamp': optimizer = AdamP(parameters, wd_ratio=0.01, nesterov=True, **opt_args) elif opt_lower == 'sgdp': optimizer = SGDP(parameters, momentum=args.momentum, nesterov=True, **opt_args) elif opt_lower == 'adadelta': optimizer = optim.Adadelta(parameters, **opt_args) elif opt_lower == 'adafactor': if not args.lr: opt_args['lr'] = None optimizer = Adafactor(parameters, **opt_args) elif opt_lower == 'adahessian': optimizer = Adahessian(parameters, **opt_args) elif opt_lower == 'rmsprop': optimizer = optim.RMSprop(parameters, alpha=0.9, momentum=args.momentum, **opt_args) elif opt_lower == 'rmsproptf': optimizer = RMSpropTF(parameters, alpha=0.9, momentum=args.momentum, **opt_args) elif opt_lower == 'novograd': optimizer = NovoGrad(parameters, **opt_args) elif opt_lower == 'nvnovograd': optimizer = NvNovoGrad(parameters, **opt_args) elif opt_lower == 'fusedsgd': del opt_args['eps'] optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=True, **opt_args) elif opt_lower == 'fusedmomentum': del opt_args['eps'] optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=False, **opt_args) elif opt_lower == 'fusedadam': optimizer = FusedAdam(parameters, adam_w_mode=False, **opt_args) elif opt_lower == 'fusedadamw': optimizer = FusedAdam(parameters, adam_w_mode=True, **opt_args) elif opt_lower == 'fusedlamb': optimizer = FusedLAMB(parameters, **opt_args) elif opt_lower == 'fusednovograd': opt_args.setdefault('betas', (0.95, 0.98)) optimizer = FusedNovoGrad(parameters, **opt_args) else: assert False and "Invalid optimizer" raise ValueError if len(opt_split) > 1: if opt_split[0] == 'lookahead': optimizer = Lookahead(optimizer) return optimizer
# load model # hin2vec.load_state_dict(torch.load('hin2vec.pt')) # set training parameters n_epoch = 1 batch_size = 64 log_interval = 200 if torch.cuda.is_available(): print('Use ', device) hin2vec = hin2vec.to(device) else: print('Use CPU') data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) optimizer = optim.AdamW(hin2vec.parameters()) # 原作者使用的是SGD? 这里使用AdamW loss_function = nn.BCELoss() for epoch in range(n_epoch): train(neighbors, log_interval, hin2vec, device, data_loader, optimizer, loss_function, epoch) torch.save(hin2vec, 'hin2vec.pt') # set output parameters [the output file is a bit different from the original code.] node_vec_fname = 'node_vec_merge_1_' + \ str(window) + '_' +\ str(walk) + '_' +\ str(walk_length) + '_' +\ str(embed_size) + '_' +\ str(neg) + '_' +\
def train(hyp_set, train_index): # create folder to save results results_folder = os.path.join(results_path, 'test_') results_folder += str(train_index) if not os.path.exists(results_folder): os.makedirs(results_folder) # save training configuration results_data = {'hyperparameters': hyp_set} with open(results_folder + '/results_data.pth', 'wb') as f: pickle.dump(results_data, f) # ============================================================================= ''' Defining objects of the model ''' # ============================================================================= # feature dimension obs_dim = data_ROM_t.shape[2] # latent dimension latent_dim = obs_dim - hyp_set['latent_dim'] # hidden units per layer in encoder units_enc = hyp_set['units_enc'] # hidden layers encoder layers_enc = hyp_set['layers_enc'] # layers in NODE block layers_node = [latent_dim] + list(hyp_set['layers_node']) + [latent_dim] # normalized vectors for ODE integration ts_ode = np.linspace(0, 1, data_ROM.shape[0]) ts_ode = torch.from_numpy(ts_ode).float().to(device) ts_ode_t = ts_ode[:twindow] ts_ode_v = ts_ode[:vwindow] # hidden units per layer in decoder units_dec = hyp_set['units_dec'] # hidden layers decoder layers_dec = hyp_set['layers_dec'] # objects for VAE enc = Encoder(latent_dim, obs_dim, units_enc, layers_enc).to(device) node = LatentOdeF(layers_node).to(device) dec = Decoder(latent_dim, obs_dim, units_dec, layers_dec).to(device) # ============================================================================= ''' Training configurations ''' # ============================================================================= # Network's parameters params = (list(enc.parameters()) + list(node.parameters()) + list(dec.parameters())) optimizer = optim.AdamW(params, lr= hyp_set['lr']) # training loss metric using average loss_meter_t = RunningAverageMeter() # training loss metric without KL meter_train = RunningAverageMeter() # validation loss metric without KL meter_valid = RunningAverageMeter() # Scheduler for learning rate decay factor = 0.99 min_lr = 1e-7 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=factor, patience=5, verbose=False, threshold=1e-5, threshold_mode='rel', cooldown=0, min_lr=min_lr, eps=1e-08) criterion = torch.nn.MSELoss() # list to track training losses lossTrain = [] # list to track validation losses lossVal = [] # number of iterations for the training iters = args.niters for itr in range(1, iters + 1): optimizer.zero_grad() # scheduler for param_group in optimizer.param_groups: current_lr = param_group['lr'] if args.sched: scheduler.step(metrics=loss_meter_t.avg) out_enc = enc.forward(obs_t) # definition of mean and log var for codings qz0_mean, qz0_logvar = out_enc[:, :latent_dim], out_enc[:, latent_dim:] # noise epsilon = torch.randn(qz0_mean.size()).to(device) # sampling codings z0 = epsilon * torch.exp(.5 * qz0_logvar) + qz0_mean # latent space evolution using node zt = odeint(node, z0, ts_ode_t, method=args.method).permute(1, 0, 2) output_vae_t = dec(zt) # compute KL loss pz0_mean = pz0_logvar = torch.zeros(z0.size()).to(device) analytic_kl = normal_kl(qz0_mean, qz0_logvar, pz0_mean, pz0_logvar).sum(-1) kl_loss = torch.mean(analytic_kl, dim=0) # VAE loss: MSE + KL loss = criterion(output_vae_t, data_ROM_t) + kl_loss # backpropagation loss.backward() # optimization step optimizer.step() # update training metric loss_meter_t.update(loss.item()) # update training loss without KL meter_train.update(loss.item() - kl_loss.item()) lossTrain.append(meter_train.avg) # validation step with torch.no_grad(): enc.eval() node.eval() dec.eval() zv = odeint(node, z0, ts_ode_v, method=args.method).permute(1, 0, 2) output_vae_v = dec(zv) loss_v = criterion(output_vae_v[:, twindow:], data_ROM_v[:, twindow:]) meter_valid.update(loss_v.item()) lossVal.append(meter_valid.avg) enc.train() node.train() dec.train() if itr % 100 == 0: print('Iter: {}, Learning rate is: {:.4f}'.format(itr, current_lr)) print('Iter: {}, Train Loss: {:.4f}'.format(itr, lossTrain[itr - 1])) print('Iter: {}, Valid Loss: {:.4f}'.format(itr, lossVal[itr - 1])) # scale output output_vae = (output_vae_v.cpu().detach().numpy()) * std_data + mean_data plotROM(output_vae, data_ROM[:vwindow, :], lossTrain, lossVal, itr, twindow, results_folder) if np.isnan(lossTrain[itr - 1]): break torch.save(enc.state_dict(), results_folder + '/enc.pth') torch.save(node.state_dict(), results_folder + '/node.pth') torch.save(dec.state_dict(), results_folder + '/dec.pth') # test results with torch.no_grad(): enc.eval() node.eval() dec.eval() ze = odeint(node, z0, ts_ode, method=args.method).permute(1, 0, 2) output_vae_e = dec(ze) enc.train() node.train() dec.train() data_NODE = (output_vae_e.cpu().detach().numpy()) * std_data + mean_data with open('./data_node8.pth', 'wb') as f: pickle.dump(data_NODE, f)
std=std) data_testB = DataLoader(testB, batch_size=batch_size, shuffle=False) #set device (eiter cpu or gpu) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) use = 'segnet' if use == 'segnet': model = SegNet().to(device) summary(model, (3, 256, 256)) else: model = UNet().to(device) summary(model, (3, 256, 256)) F1_scores = train(model, optim.AdamW(model.parameters(), lr=0.0001), 100, data_tr, data_val, data_testA, data_testB, device) plt.rcParams['figure.figsize'] = [18, 12] ig, (ax0, ax1, ax2, ax3) = plt.subplots(nrows=4, sharex=True) ax0.errorbar(range(len(F1_scores['train_mean'])), F1_scores['train_mean'], yerr=F1_scores['train_std'], fmt='-o') ax0.set_title('F1-Score train') ax1.errorbar(range(len(F1_scores['val_mean'])), F1_scores['val_mean'], yerr=F1_scores['val_std'], fmt='-o')
print('Using model', type(net).__name__, 'with', net.num_classes, 'output neurons') # Losses losses_fn = get_losses_fn(cfg) print('Using losses', [(type(loss).__name__, 'weight:' + str(loss.weight)) for loss in losses_fn]) # Metrics metrics_fn = get_metrics(cfg) print('Using metrics', [type(metric).__name__ for metric in metrics_fn]) # Create Checkpoint dir cfg.checkpoint_dir = os.path.join(cfg.experiment.output_dir, cfg.experiment.name) os.makedirs(cfg.checkpoint_dir, exist_ok=True) # Optimizer optimizer = optim.AdamW(net.parameters(), lr=cfg.hyperparameter.lr_base) print('Using optimizer', optimizer) scheduler = None if cfg.scheduler.use_scheduler: scheduler = eval(cfg.scheduler.name)(optimizer, **cfg.scheduler_params) print('Using scheduler', scheduler) net = nn.DataParallel(net) print("Let's use", torch.cuda.device_count(), "GPUs!") if args.ckpt is None: # Run training eval_accuracies = train(net, losses_fn, metrics_fn,
def run_app(): # GUI epoch_loc = st.empty() prog_bar = st.empty() loss_loc = st.empty() global_loss_loc = st.empty() col1, col2 = st.beta_columns(2) img_loc = col1.empty() stats_loc = col2.empty() image_meta_loc = st.empty() loss_chart = st.empty() glob_loss_chart = st.empty() test_progress_bar = st.empty() testing_chart = st.empty() test_stats = st.empty() cuda = torch.device('cuda') cpu = torch.device('cpu') net = Net() net.to(cuda) # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() optimizer = optim.AdamW(net.parameters(), lr=0.001) dataset = EegDataset() EPOCHS = 200 losses = deque(maxlen=100) global_losses = deque(maxlen=100) for epoch in range(EPOCHS): i = 1 epoch_loc.write(f"EPOCH:\t{epoch}/{EPOCHS-1}") global_loss = torch.tensor([0.0], device=cuda) optimizer.zero_grad() for image in dataset: prog_bar.progress(i / len(dataset)) i += 1 optimizer.zero_grad() x = image.data img_loc.image(image.data.numpy(), width=200) image_meta_loc.write( f"ID:\t{image.id} \nCategory:\t{image.category}") out = net(x.cuda().float()) #.unsqueeze(0) target_id = dataset.categories.index(image.category) target = torch.zeros(len(dataset.categories)) target[target_id] = 1 target = target.cuda().float() # target = torch.tensor([dataset.categories.index(image.category)]).cuda() # stats_loc.write(f"OUTPUT:\t{torch.argmax(out.detach().cpu(), 1)} \nTARGET:\t{target.detach().cpu()}") stats_loc.write( f"OUTPUT:\t{torch.argmax(out.detach().cpu(), 0)} \nTARGET:\t{target_id}" ) # print(target.shape) loss = criterion(out, target) losses.append(loss.detach().cpu().numpy()) loss_chart.line_chart(pd.DataFrame(losses, columns=[ 'loss', ])) global_loss += loss loss.backward() optimizer.step() loss_loc.write(f"LOSS:\t{loss.detach().cpu()}") # print(loss) global_loss_loc.write(f"GLOBAL LOSS:\t{global_loss.detach().cpu()}") global_losses.append(global_loss.detach().cpu().numpy()) glob_loss_chart.line_chart( pd.DataFrame(global_losses, columns=[ 'global_loss', ])) # global_loss.backward() # optimizer.step() # TESTING PHASE: dataset = EegDataset(testing=True) right = 0 wrong = 0 st.write('TESTING!!!!!!!!!!!!!!!!!!!/EVALUATING????') i = 1 with torch.no_grad(): for image in dataset: test_progress_bar.progress(i / len(dataset)) i += 1 x = image.data out = net(x.cuda().float()) out_id = torch.argmax(out.detach().cpu(), 0) target_id = dataset.categories.index(image.category) if out_id == target_id: right += 1 else: wrong += 1 # chart_data = pd.DataFrame(np.array([[right, wrong]]), columns=['right', wrong]) # testing_chart.bar_chart(chart_data) test_stats.write( f'RIGHT: {right}/{len(dataset)} \nWRONG: {wrong}/{len(dataset)}' )
# assemble simulator = PipelineAgent(None, None, policy_usr, None, 'user') evaluator = MultiWozEvaluator() env = Environment(None, simulator, None, dst_sys, evaluator) # ICM pre-training if cfg_file == 'ic_das' or cfg_file == 'ic_utt': steps = 0 i = 0 #episode # optimizer: outside any 'update' class method to allow icm_optim = optim.AdamW(icm.parameters(), lr= icm_lr) # ---------- pre-training loop ---------------- while True: # get episode sampled_episode, user_das, sys_das = sample(env, policy_sys, batchsz) # unpack _, _, _, _, mask = sampled_episode batchsz_real = len(mask) # update ICM for j in range(update_round_icm): # optim zero grad
args.loader_num_aux_vocabs = 2 args.loader_num_items = data_provider.num_item logging.info(data_provider.report_info()) dict_to_logger(vars(args), exclude_list=["name", "seed", "gpu"]) model = create_model_and_load_parameter() model.cuda() train_loader = data_provider.training_loader() val_loader = data_provider.validation_loader() pack = { "params": model.parameters(), "lr": args.train_lr, "weight_decay": args.train_wd } # optimizer = optim.Adam(**pack) optimizer = optim.AdamW(**pack) # optimizer = Lamb(**pack) # optimizer = optim.SGD(**pack, momentum=0.9) lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=args.train_lr_decay_step, gamma=args.train_lr_decay_gamma) log_model_info() # exit(0) start()
if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True #optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) optimizer = optim.AdamW(net.parameters(), lr=initial_lr) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() def train(): net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean))