def main(args): set_seed(args.seed) # Load Checkpoint if exists start_epoch = 0 if args.load: try: print("Loading models: {}".format(args.model_path)) checkpoint = torch.load(args.model_path) start_epoch = checkpoint['epoch'] + 1 model = checkpoint['model'] model_optimizer = checkpoint['model_optimizer'] print("Model Loaded") except: print("Model couldn't be loaded. Aborting") exit(0) else: model = model() model_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) print("Model Loaded") # Deal With CUDA if args.cuda: device = args.cuda_device cudnn.benchmark = True if torch.cuda.device_count() > 1: print("There are", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) else: device = 'cpu' model = model.to(device) criterion = nn.CrossEntropyLoss().to(device) if args.do_eval: print("Loading Validation Dataset") val_dataset = CustomDataset(args.data_dir, 'validation') val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) print("There are {} validation data examples".format(len(val_dataset))) print("Evaluating Model") validation_score = validate(args) print("Model performance is {}".format(validation_score)) if args.do_train: validation_score, is_best = 0, False #in case we arent evaluating print("Loading Training Dataset") train_dataset = CustomDataset(args.data_dir, 'training') print("There are {} training data examples".format(len(train_dataset))) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) print("Traing model") for epoch in range(start_epoch, args.epochs): print("Starting epoch {}".format(epoch)) train(args) if args.do_eval: validation_score = validate(args) is_best= False if validation_score > best_validation_score: is_best = True best_validation_score = validation_score save_checkpoint(args, epoch, validation_score, is_best)
def main(args): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = load_model(base_model_path=args.base_model_path, model_checkpoint=args.model_checkpoint, device=device) dataset = CustomDataset(root=args.input_image_folder, transform=get_transforms(), return_paths=True) dataloader = DataLoader(dataset=dataset, shuffle=False, batch_size=32, pin_memory=True, num_workers=8) output_folder = './classification_output_files' if not os.path.exists(output_folder): os.makedirs(output_folder) label_file = open( os.path.join( output_folder, 'correctly_classified_images_' + args.output_filename + '.txt'), 'w') results_file_path = os.path.join( output_folder, 'classified_images_' + args.output_filename + '.csv') tot = 0 correct_ = 0 results = pd.DataFrame() progress_bar = tqdm(dataloader, total=len(dataloader), desc='Classifying: {}'.format( args.inputImageFolder.split('/')[-1])) for x, l, p in progress_bar: tot += x.shape[0] x = x.to(device) y_hat = model(x, True).max(-1)[1].cpu() correct = torch.where(y_hat == l)[0] correct_ += y_hat.eq(l).sum().item() progress_bar.set_postfix(accuracy=correct_) [label_file.write(p[idx] + '\n') for idx in correct] for i in range(len(l)): result = pd.DataFrame(dict(label=l[i].item(), predicted=y_hat[i].item()), index=[0]) results = results.append(result, ignore_index=True) results.to_csv(results_file_path, index=False) label_file.close() print("Accuracy: {:.2f}%".format(correct_ / tot * 100))
def test(): print("=== Test ===") args = get_args() print(args) data_dir = f"./../../asset/{args.dataset}/" if args.train : test_labels, test_texts = read_train_data(data_dir) else : test_labels, test_texts = read_test_data(data_dir) # test_texts = list(test_texts)[:100] # test_labels = list(test_labels)[:100] test_texts = list(test_texts) test_labels = list(test_labels) model_name = args.model tokenizer = AutoTokenizer.from_pretrained(model_name) test_encodings = tokenizer( test_texts, truncation=True, padding=True, max_length=512) test_dataset = CustomDataset(test_encodings, test_labels) checkpoint_dir = f"./models/{args.task}/{args.model}/" best_checkpoint = find_best_checkpoint(checkpoint_dir) model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint) test_trainer = Trainer(model) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False) raw_pred, _, _ = test_trainer.prediction_loop( test_loader, description="prediction") # Preprocess raw predictions y_pred = np.argmax(raw_pred, axis=1) metrics = compute_metrics(y_pred, test_labels) print(metrics) if args.train : fpath = os.path.join(data_dir, f"train-predictions/{args.model}.pkl") else : fpath = os.path.join(data_dir, f"predictions/{args.model}.pkl") parent_dir = "/".join(str(fpath).split('/')[:-1]) if not os.path.exists(parent_dir): os.makedirs(parent_dir) with open(fpath, 'wb') as f: pickle.dump(y_pred, f)
def bertPreprocessing(dataset_name, data_df, MAX_LEN=128, save_all=True): """ Dataset preparation for Bert Model. It is splitted (0.8 train, 0.1 valid and 0.1 test) and sets are returned. Every set is a CustomDataset class (see utils.py) that return data in Bert format. :param dataset_name: string of dataset name. :param data_df: dataset in dataframe pandas format. :param MAX_LEN: it represents total words represented in bert encoding (other words will be ignored). :param save_all: boolean that specifies if save all data for time saving before training or network evaluating. :return: training_set, validation_set, test_set in CustomDataset format. """ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') train_size = 0.8 train_dataset = data_df.sample(frac=train_size, random_state=200).reset_index(drop=True) tmp_dataset = data_df.drop(train_dataset.index).reset_index(drop=True) test_dataset = tmp_dataset.sample(frac=0.5, random_state=200).reset_index(drop=True) val_dataset = tmp_dataset.drop(test_dataset.index).reset_index(drop=True) print("FULL Dataset: {}".format(data_df.shape)) print("TRAIN Dataset: {}".format(train_dataset.shape)) print("TEST Dataset: {}".format(test_dataset.shape)) print("VALID Dataset: {}".format(val_dataset.shape)) training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN) validation_set = CustomDataset(val_dataset, tokenizer, MAX_LEN) test_set = CustomDataset(test_dataset, tokenizer, MAX_LEN) if save_all is True: os.makedirs(os.path.dirname('datasets/' + dataset_name + '_bert_cleaned.txt'), exist_ok=True) with open('datasets/' + dataset_name + '_bert_cleaned.txt', 'wb') as f: pickle.dump([training_set, validation_set, test_set, MAX_LEN], f) return training_set, validation_set, test_set
def bertPredict(dataset_name, n_classes, model_path, text, label): """ This function predicts label of instance of a bert pretrained dataset, using cpu, printing results. :param dataset_name: string of dataset name. :param n_classes: int of number of dataset classes. :param model_path: path of saved pytorch model fine tuned bert network. :param text: string to classify. :param label: true label (int) associated to the text in input. :return: None """ device = torch.device('cpu') MAX_LEN = 128 TEST_BATCH_SIZE = 8 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') pred_data = pd.DataFrame(data={'text': [text], 'label': [label]}) predict = CustomDataset(pred_data, tokenizer, MAX_LEN) test_params = {'batch_size': TEST_BATCH_SIZE, 'shuffle': True, 'num_workers': 0 } testing_loader = DataLoader(predict, **test_params) model = BertModel(n_classes=n_classes, dropout=0.3) model.load_state_dict(torch.load(model_path, map_location=device)) model.eval() for batch in testing_loader: ids = batch['ids'] mask = batch['mask'] token_type_ids = batch['token_type_ids'] output = model(ids, mask, token_type_ids) output = torch.softmax(output, dim=1).detach().numpy() output = np.array(output) print(output) print(text) print("True Label: {:}".format(label)) print("Predicted Label: {:}".format(output.argmax(axis=1) + 1))
from Multimodal import AskAttendAnswer from Image_Features import GridFeatures from Text_Features import TextModel from utils import CustomDataset import os import torch if __name__ == '__main__' : data_path = "/home/alex/Desktop/4-2/Text_VQA/Data/" ID_path = os.path.join(data_path,"train/train_ids.txt") json_path = os.path.join(data_path,"train/cleaned.json") dataset = CustomDataset(data_path,ID_path,json_path,(448,448),set_="train") # Image Extractor resnet = GridFeatures("resnet101") # Text Extractor bert = TextModel() #print("Check resnet : {}".format(resnet(torch.randn((1,3,448,448))).size())) print("Check bert : {}".format(bert(torch.tensor([1,2,4,0,0])).size())) # embed_dim,img_ft_dims,batch_size,max_seq_len,num_outs mixer = AskAttendAnswer(300,(2048,14,14),10,64,15000) text_fts = torch.nn.init.normal_(torch.empty(10,64,300)) img_fts = torch.nn.init.normal_(torch.empty(10,3,448,448)) mixer.combine(text_fts,img_fts)
save_weights_only=False, mode='auto', period=1) #model = resnet.ResnetBuilder.build_resnet_18((18, 32, 32), nb_classes) network = model.create_model('resnet50', input_shape=(18, 32, 32), num_outputs=nb_classes) network.compile(loss='sparse_categorical_crossentropy', optimizer=optimizers.SGD(lr=0.1, momentum=0.9, nesterov=True), metrics=['accuracy']) mean = unpickle("mean_channal.pkl") trfs = T.Compose([T.Normalize(mean), T.RandomHorizontalFlip(0.5)]) training_data = CustomDataset('/mnt/img1/yangqh/Germany_cloud/training.h5', transform=None) validation_data = CustomDataset('/mnt/img1/yangqh/Germany_cloud/training.h5', transform=None) ##############此处改成train set 的目录 network.fit_generator( training_data.load_data(batch_size=batch_size), steps_per_epoch=len(training_data) // batch_size, validation_data=validation_data.load_data(batch_size=batch_size), validation_steps=len(validation_data) // batch_size, epochs=nb_epoch, verbose=1, max_q_size=100, callbacks=[checkpoint, lr_reducer, early_stopper, csv_logger]) network.save('final.h5')
from torch.utils.data import DataLoader from utils import CustomDataset dataset = CustomDataset() dataloader = DataLoader( dataset, batch_size=2, shuffle=True, )
def main(args): # Setting warnings.simplefilter("ignore", UserWarning) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Args Parser hj_method = args.hj_method kr_method = args.kr_method batch_size = args.batch_size beam_size = args.beam_size hidden_size = args.hidden_size embed_size = args.embed_size vocab_size = args.vocab_size max_len = args.max_len padding_index = args.pad_id n_layers = args.n_layers stop_ix = args.stop_ix # Load saved model & Word2vec save_path = 'save_{}_{}_{}_maxlen_{}'.format(vocab_size, hj_method, kr_method, max_len) save_list = sorted(glob.glob(f'./save/{save_path}/*.*')) save_pt = save_list[-1] print('Will load {} pt file...'.format(save_pt)) word2vec_hj = Word2Vec.load('./w2v/word2vec_hj_{}_{}.model'.format( vocab_size, hj_method)) # SentencePiece model load spm_kr = spm.SentencePieceProcessor() spm_kr.Load("./spm/m_korean_{}.model".format(vocab_size)) # Test data load with open('./test_dat.pkl', 'rb') as f: test_dat = pickle.load(f) test_dataset = CustomDataset(test_dat['test_hanja'], test_dat['test_korean']) test_loader = getDataLoader(test_dataset, pad_index=padding_index, shuffle=False, batch_size=batch_size) # Model load print('Model loading...') encoder = Encoder(vocab_size, embed_size, hidden_size, word2vec_hj, n_layers=n_layers, padding_index=padding_index) decoder = Decoder(embed_size, hidden_size, vocab_size, n_layers=n_layers, padding_index=padding_index) seq2seq = Seq2Seq(encoder, decoder, beam_size).cuda() #optimizer = optim.Adam(seq2seq.parameters(), lr=lr, weight_decay=w_decay) #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step_size, gamma=lr_decay) print(seq2seq) print('Testing...') start_time = time.time() results = test(seq2seq, test_loader, vocab_size, load_pt=save_pt, stop_ix=stop_ix) print(time.time() - start_time) print('Done!') print("Decoding...") pred_list = list() for result_text in tqdm(results): text = torch.Tensor(result_text).squeeze().tolist() text = [int(x) for x in text] prediction_sentence = spm_kr.decode_ids( text).strip() # Decode with strip pred_list.append(prediction_sentence) ref_list = list() for ref_text in tqdm(test_dat['test_korean'][:stop_ix]): ref_list.append(spm_kr.decode_ids(ref_text).strip()) print('Done!') with open(f'./save/{save_path}/test_result.pkl', 'wb') as f: pickle.dump({ 'pred': pred_list, 'reference': ref_list, }, f) print('Save file; /test_dat.pkl') # Calculate BLEU Score print('Calculate BLEU4, METEOR, Rogue-L...') chencherry = SmoothingFunction() bleu4 = corpus_bleu(test_dat['reference'], test_dat['pred'], smoothing_function=chencherry.method4) print('BLEU Score is {}'.format(bleu4)) # Calculate METEOR Score meteor = meteor_score(test_dat['reference'], test_dat['pred']) print('METEOR Score is {}'.format(meteor)) # Calculate Rouge-L Score r = Rouge() total_test_length = len(test_dat['reference']) precision_all = 0 recall_all = 0 f_score_all = 0 for i in range(total_test_length): [precision, recall, f_score] = r.rouge_l([test_dat['reference'][i]], [test_dat['pred'][i]]) precision_all += precision recall_all += recall f_score_all += f_score print('Precision : {}'.foramt(round(precision_all / total_test_length, 4))) print('Recall : {}'.foramt(round(recall_all / total_test_length, 4))) print('F Score : {}'.foramt(round(f_score_all / total_test_length, 4)))
def predict(): print("=== Predict ===") args = get_args() print(args) if args.bias_type != "": data_dir = f"./../../data/{args.mutation_tool}/{args.bias_type}/{args.mutant}/" else: data_dir = f"./../../data/{args.mutation_tool}/{args.mutant}/" if args.type == "mutant": test_labels, test_texts = read_test_data(data_dir) elif args.type == "original": generate_original_data(data_dir, mutation_tool=args.mutation_tool) test_labels, test_texts = read_original_data(data_dir) else: raise ValueError("Unknown type that needs to be tested") # test_texts = list(test_texts)[:100] # test_labels = list(test_labels)[:100] test_texts = list(test_texts) test_labels = list(test_labels) model_name = args.model tokenizer = AutoTokenizer.from_pretrained(model_name) if args.task == "imdb" and args.type == "mutant" and ( args.bias_type == "occupation" or args.bias_type == "country"): test_encodings = batch_tokenizer(tokenizer, test_texts, batch_size=10000) else: test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=512) test_dataset = CustomDataset(test_encodings, test_labels) checkpoint_dir = f"./models/{args.task}/{args.model}/" best_checkpoint = find_best_checkpoint(checkpoint_dir) model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint) test_trainer = Trainer(model) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) raw_pred, _, _ = test_trainer.prediction_loop(test_loader, description="prediction") # Preprocess raw predictions y_pred = np.argmax(raw_pred, axis=1) fpath = os.path.join(data_dir, f"{args.type}-predictions/{args.model}.pkl") parent_dir = "/".join(str(fpath).split('/')[:-1]) if not os.path.exists(parent_dir): os.makedirs(parent_dir) with open(fpath, 'wb') as f: pickle.dump(y_pred, f)
def fine_tune(): print("=== Fine-tune ===") args = get_args() print(args) if args.task == "imdb": data_dir = "./../../asset/imdb/" train_labels, train_texts = read_imdb_train(data_dir) elif args.task == "twitter_semeval": data_dir = "./../../asset/twitter_semeval/" train_labels, train_texts = read_twitter_train(data_dir) elif args.task == "twitter_s140": data_dir = "./../../asset/twitter_s140/" train_labels, train_texts = read_twitter_train(data_dir) # check_data() train_texts, val_texts, train_labels, val_labels = train_test_split( train_texts, train_labels, test_size=args.test_size) ## IF HAVE MUCH TIME, try to increase test size because the fine-tuning run fast train_texts = list(train_texts) val_texts = list(val_texts) train_labels = list(train_labels) val_labels = list(val_labels) model_name = args.model # model_name = "bert-base-cased" # model_name = "roberta-base" # model_name = "microsoft/deberta-large-mnli" # model_name = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_name) # check_data() train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512) val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512) train_dataset = CustomDataset(train_encodings, train_labels) val_dataset = CustomDataset(val_encodings, val_labels) model = AutoModelForSequenceClassification.from_pretrained(model_name) training_args = TrainingArguments( # output directory output_dir=f'./models/{args.task}/{model_name}/', num_train_epochs=args.epochs, # total number of training epochs per_device_train_batch_size=args. train_bs, # batch size per device during training per_device_eval_batch_size=64, # batch size for evaluation warmup_steps=args. warmup_steps, # number of warmup steps for learning rate scheduler weight_decay=args.weight_decay, # strength of weight decay # directory for storing logs logging_dir=f'./logs/{args.task}/{model_name}/', logging_steps=args.logging_steps, learning_rate=args.learning_rate, seed=0, evaluation_strategy="steps", eval_steps=args.eval_steps, save_total_limit=5, save_steps=args.save_steps, load_best_model_at_end=True) # trainer = Trainer( # # the instantiated 🤗 Transformers model to be trained # model=model, # args=training_args, # training arguments, defined above # train_dataset=train_dataset, # training dataset # eval_dataset=val_dataset, # evaluation dataset # compute_metrics=compute_metrics, # ) trainer = Trainer( # the instantiated 🤗 Transformers model to be trained model=model, args=training_args, # training arguments, defined above train_dataset=train_dataset, # training dataset eval_dataset=val_dataset, # evaluation dataset compute_metrics=compute_metrics, callbacks=[EarlyStoppingCallback(early_stopping_patience=7)], ) trainer.train()
args = parse_args() param = { 'max_depth': 4, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic' } param['nthread'] = 12 param['eval_metric'] = 'auc' name = os.path.join(args.output_root, '{}'.format(args.model_type)) exclude = args.exclude_feature.split(',') train_dataset = CustomDataset(args.dataset_root, 'train_data.pkl', exclude) valid_dataset = CustomDataset(args.dataset_root, 'valid_data.pkl', exclude) test_dataset = CustomDataset(args.dataset_root, 'test_data.pkl', exclude) dtrain = xgb.DMatrix(np.array(train_dataset.static), label=train_dataset.label) dvalid = xgb.DMatrix(np.array(valid_dataset.static), label=valid_dataset.label) dtest = xgb.DMatrix(np.array(test_dataset.static), label=test_dataset.label) evallist = [(dvalid, 'eval'), (dtrain, 'train')] bst = xgb.train(param, dtrain, 10, evallist) pred = bst.predict(dtest)
# Split dataset X_train, X_test, y_train, y_test = train_test_split(images, labels, random_state=42, shuffle=True, train_size=0.75, test_size=0.25) # Definte transforms transform = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()]) # Init DataLoader loaders = {} train_dataset = CustomDataset(X_train, y_train, transform) train_sampler = torch.utils.data.RandomSampler(train_dataset) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn, num_workers=6, drop_last=True) eval_dataset = CustomDataset(X_test, y_test, transform) eval_sampler = torch.utils.data.RandomSampler(eval_dataset) eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn, num_workers=4,
def main(root_dir=cfg.root_dir,batch_size=cfg.batch_size,lr=cfg.lr,model_name=cfg.model_name,\ weight_decay=cfg.weight_decay,n_epochs=cfg.n_epochs,log_dir=cfg.log_dir,k_fold=cfg.k_fold,\ patience=cfg.patience,steplr_step_size=cfg.steplr_step_size,steplr_gamma=cfg.steplr_gamma,\ save_dir=cfg.save_dir): #Seed torch.manual_seed(42) np.random.seed(42) # Available CUDA use_cuda= True if torch.cuda.is_available() else False device=torch.device('cuda:0' if use_cuda else 'cpu') # CPU or GPU #Transforms train_transforms=A.Compose([ A.CenterCrop(230,230), A.RandomCrop(224,224), A.ElasticTransform(), A.IAAPerspective(), A.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ToTensorV2(), ]) valid_transforms=A.Compose([ A.CenterCrop(224,224), A.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ToTensorV2() ]) #Dataset dataset=CustomDataset(root_dir,transforms=train_transforms) # Stratified K-fold Cross Validation kf=StratifiedKFold(n_splits=k_fold,shuffle=True,random_state=42) # Tensorboard Writer writer=SummaryWriter(log_dir) for n_fold, (train_indices,test_indices) in enumerate(kf.split(dataset.x_data,dataset.y_data),start=1): print(f'=====Stratified {k_fold}-fold : {n_fold}=====') #Dataloader train_sampler=torch.utils.data.SubsetRandomSampler(train_indices) valid_sampler=torch.utils.data.SubsetRandomSampler(test_indices) train_loader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,sampler=train_sampler) valid_loader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,sampler=valid_sampler) #Model,Criterion,Optimizer,scheduler,regularization model=Net(model_name).to(device) criterion=nn.CrossEntropyLoss().to(device) optimizer=AdamP(model.parameters(),lr=lr) scheduler_steplr=optim.lr_scheduler.StepLR(optimizer,step_size=steplr_step_size,gamma=steplr_gamma) regularization=EarlyStopping(patience=patience) #Train model.train() for epoch in range(n_epochs): print(f'Learning rate : {optimizer.param_groups[0]["lr"]}') train_metric_monitor=MetricMonitor() train_stream=tqdm(train_loader) for batch_idx,sample in enumerate(train_stream,start=1): img,label=sample['img'].to(device),sample['label'].to(device) output=model(img) optimizer.zero_grad() loss=criterion(output,label) _,preds=torch.max(output,dim=1) correct=torch.sum(preds==label.data) train_metric_monitor.update('Loss',loss.item()) train_metric_monitor.update('Accuracy',100.*correct/len(img)) loss.backward() optimizer.step() train_stream.set_description( f'Train epoch : {epoch} | {train_metric_monitor}' ) # Valid valid_metric_monitor=MetricMonitor() valid_stream=tqdm(valid_loader) model.eval() with torch.no_grad(): for batch_idx,sample in enumerate(valid_stream): img,label=sample['img'].to(device),sample['label'].to(device) output=model(img) loss=criterion(output,label) _,preds=torch.max(output,dim=1) correct=torch.sum(preds==label.data) valid_metric_monitor.update('Loss',loss.item()) valid_metric_monitor.update('Accuracy',100.*correct/len(img)) valid_stream.set_description( f'Test epoch : {epoch} | {valid_metric_moniotr}' ) # Tensorboard train_loss=train_metric_monitor.metrics['Loss']['avg'] train_accuracy=train_metric_monitor.metrics['Accuracy']['avg'] valid_loss=valid_metric_monitor.metrics['Loss']['avg'] valid_accuracy=valid_metric_monitor.metrics['Accuracy']['avg'] writer.add_scalars(f'{n_fold}-fold Loss',{'train':train_loss,'valid':valid_loss},epoch) writer.add_scalars(f'{n_fold}-fold Accuracy',{'train':train_accuracy,'valid':valid_accuracy},epoch) #Save Model if regularization.early_stopping: break regularization.path=os.path.join(save_dir,f'{n_fold}_fold_{epoch}_epoch.pt') regularization(val_loss=valid_loss,model=model) scheduler_steplr.step() writer.close()
def train(config): if not os.path.exists(config.out): os.makedirs(config.out) comp_transform = transforms.Compose([ transforms.CenterCrop(config.crop), transforms.Resize(config.resize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) domain_a_train = CustomDataset(os.path.join(config.root, 'trainA.txt'), transform=comp_transform) domain_b_train = CustomDataset(os.path.join(config.root, 'trainB.txt'), transform=comp_transform) a_label = torch.full((config.bs,), 1) b_label = torch.full((config.bs,), 0) b_separate = torch.full((config.bs, config.sep, config.resize // (2 ** (config.n_blocks + 1)), config.resize // (2 ** (config.n_blocks + 1))), 0) # build networks e1 = E1(sep=config.sep, size=config.resize) e2 = E2(n_feats=config.n_tot_feats, sep=config.sep) decoder = Decoder(n_feats=config.n_tot_feats) disc = Disc(size=config.resize, sep=config.sep) rho_clipper = RhoClipper(0., 1.) mse = nn.MSELoss() bce = nn.BCELoss() if torch.cuda.is_available(): e1 = e1.cuda() e2 = e2.cuda() decoder = decoder.cuda() disc = disc.cuda() a_label = a_label.cuda() b_label = b_label.cuda() b_separate = b_separate.cuda() mse = mse.cuda() bce = bce.cuda() ae_params = list(e1.parameters()) + list(e2.parameters()) + list(decoder.parameters()) ae_optimizer = optim.Adam(ae_params, lr=config.lr, betas=(config.beta1, config.beta2), eps=config.eps) disc_params = disc.parameters() disc_optimizer = optim.Adam(disc_params, lr=config.d_lr, betas=(config.beta1, config.beta2), eps=config.eps) _iter: int = 0 if config.load != '': save_file = os.path.join(config.load, 'checkpoint') _iter = load_model(save_file, e1, e2, decoder, ae_optimizer, disc, disc_optimizer) e1 = e1.train() e2 = e2.train() decoder = decoder.train() disc = disc.train() print('[*] Started training...') while True: domain_a_loader = torch.utils.data.DataLoader(domain_a_train, batch_size=config.bs, shuffle=True, num_workers=config.n_threads) domain_b_loader = torch.utils.data.DataLoader(domain_b_train, batch_size=config.bs, shuffle=True, num_workers=config.n_threads) if _iter >= config.iters: break for domain_a_img, domain_b_img in zip(domain_a_loader, domain_b_loader): if domain_a_img.size(0) != config.bs or domain_b_img.size(0) != config.bs: break domain_a_img = Variable(domain_a_img) domain_b_img = Variable(domain_b_img) if torch.cuda.is_available(): domain_a_img = domain_a_img.cuda() domain_b_img = domain_b_img.cuda() domain_a_img = domain_a_img.view((-1, 3, config.resize, config.resize)) domain_b_img = domain_b_img.view((-1, 3, config.resize, config.resize)) ae_optimizer.zero_grad() a_common = e1(domain_a_img) a_separate = e2(domain_a_img) a_encoding = torch.cat([a_common, a_separate], dim=1) b_common = e1(domain_b_img) b_encoding = torch.cat([b_common, b_separate], dim=1) a_decoding = decoder(a_encoding) b_decoding = decoder(b_encoding) g_loss = mse(a_decoding, domain_a_img) + mse(b_decoding, domain_b_img) preds_a = disc(a_common) preds_b = disc(b_common) g_loss += config.adv_weight * (bce(preds_a, b_label) + bce(preds_b, b_label)) g_loss.backward() torch.nn.utils.clip_grad_norm_(ae_params, 5.) ae_optimizer.step() disc_optimizer.zero_grad() a_common = e1(domain_a_img) b_common = e1(domain_b_img) disc_a = disc(a_common) disc_b = disc(b_common) d_loss = bce(disc_a, a_label) + bce(disc_b, b_label) d_loss.backward() torch.nn.utils.clip_grad_norm_(disc_params, 5.) disc_optimizer.step() decoder.apply(rho_clipper) if _iter % config.progress_iter == 0: print('[*] [%07d/%07d] d_loss : %.4f, g_loss : %.4f' % (_iter, config.iters, d_loss, g_loss)) if _iter % config.display_iter == 0: e1 = e1.eval() e2 = e2.eval() decoder = decoder.eval() save_images(config, e1, e2, decoder, _iter) e1 = e1.train() e2 = e2.train() decoder = decoder.train() if _iter % config.save_iter == 0: save_file = os.path.join(config.out, 'checkpoint') save_model(save_file, e1, e2, decoder, ae_optimizer, disc, disc_optimizer, _iter) _iter += 1
transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotation(20), transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1), transforms.ToTensor(), transforms.Normalize(normMean, normStd) ]) # define the transformation of the val images val_transform = transforms.Compose([ transforms.Resize((input_size, input_size)), transforms.ToTensor(), transforms.Normalize(normMean, normStd) ]) # Define training set using train_df and our defined transitions (train_transform) training_set = CustomDataset(df_train, transform=train_transform) train_loader = DataLoader(training_set, batch_size=32, shuffle=True, num_workers=4) # Same for validation set: validation_set = CustomDataset(df_val, transform=train_transform) val_loader = DataLoader(validation_set, batch_size=32, shuffle=False, num_workers=4) model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss().to(device)
def train(args): if not os.path.exists(args.out): os.makedirs(args.out) _iter = 0 comp_transformA = transforms.Compose([ transforms.CenterCrop(args.cropA), transforms.Resize(args.resize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) comp_transformB = transforms.Compose([ transforms.CenterCrop(args.cropB), transforms.Resize(args.resize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) domA_train = CustomDataset(os.path.join(args.root, 'trainA.txt'), transform=comp_transformA) domB_train = CustomDataset(os.path.join(args.root, 'trainB.txt'), transform=comp_transformB) A_label = torch.full((args.bs,), 1) B_label = torch.full((args.bs,), 0) B_separate = torch.full((args.bs, args.sep * (args.resize // 64) * (args.resize // 64)), 0) e1 = E1(args.sep, args.resize // 64) e2 = E2(args.sep, args.resize // 64) decoder = Decoder(args.resize // 64) disc = Disc(args.sep, args.resize // 64) mse = nn.MSELoss() bce = nn.BCELoss() if torch.cuda.is_available(): e1 = e1.cuda() e2 = e2.cuda() decoder = decoder.cuda() disc = disc.cuda() A_label = A_label.cuda() B_label = B_label.cuda() B_separate = B_separate.cuda() mse = mse.cuda() bce = bce.cuda() ae_params = list(e1.parameters()) + list(e2.parameters()) + list(decoder.parameters()) ae_optimizer = optim.Adam(ae_params, lr=args.lr, betas=(0.5, 0.999)) disc_params = disc.parameters() disc_optimizer = optim.Adam(disc_params, lr=args.disclr, betas=(0.5, 0.999)) if args.load != '': save_file = os.path.join(args.load, 'checkpoint') _iter = load_model(save_file, e1, e2, decoder, ae_optimizer, disc, disc_optimizer) e1 = e1.train() e2 = e2.train() decoder = decoder.train() disc = disc.train() print('Started training...') while True: domA_loader = torch.utils.data.DataLoader(domA_train, batch_size=args.bs, shuffle=True, num_workers=6) domB_loader = torch.utils.data.DataLoader(domB_train, batch_size=args.bs, shuffle=True, num_workers=6) if _iter >= args.iters: break for domA_img, domB_img in zip(domA_loader, domB_loader): if domA_img.size(0) != args.bs or domB_img.size(0) != args.bs: break domA_img = Variable(domA_img) domB_img = Variable(domB_img) if torch.cuda.is_available(): domA_img = domA_img.cuda() domB_img = domB_img.cuda() domA_img = domA_img.view((-1, 3, args.resize, args.resize)) domB_img = domB_img.view((-1, 3, args.resize, args.resize)) ae_optimizer.zero_grad() A_common = e1(domA_img) A_separate = e2(domA_img) A_encoding = torch.cat([A_common, A_separate], dim=1) B_common = e1(domB_img) B_encoding = torch.cat([B_common, B_separate], dim=1) A_decoding = decoder(A_encoding) B_decoding = decoder(B_encoding) loss = mse(A_decoding, domA_img) + mse(B_decoding, domB_img) if args.discweight > 0: preds_A = disc(A_common) preds_B = disc(B_common) loss += args.discweight * (bce(preds_A, B_label) + bce(preds_B, B_label)) loss.backward() torch.nn.utils.clip_grad_norm_(ae_params, 5) ae_optimizer.step() if args.discweight > 0: disc_optimizer.zero_grad() A_common = e1(domA_img) B_common = e1(domB_img) disc_A = disc(A_common) disc_B = disc(B_common) loss2 = bce(disc_A, A_label) + bce(disc_B, B_label) loss2.backward() torch.nn.utils.clip_grad_norm_(disc_params, 5) disc_optimizer.step() if _iter % args.progress_iter == 0: print('Outfile: %s | Iteration %d | loss %.6f | loss1: %.6f | loss2: %.6f' % (args.out, _iter, loss+loss2, loss, loss2)) if _iter % args.display_iter == 0: e1 = e1.eval() e2 = e2.eval() decoder = decoder.eval() save_imgs(args, e1, e2, decoder, _iter) e1 = e1.train() e2 = e2.train() decoder = decoder.train() if _iter % args.save_iter == 0: save_file = os.path.join(args.out, 'checkpoint_%d' % _iter) save_model(save_file, e1, e2, decoder, ae_optimizer, disc, disc_optimizer, _iter) _iter += 1
def load_dataset(dataset, data_path): print('==> Preparing data..') if dataset == 'CIFAR-10': transform_train = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root=data_path, train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR10(root=data_path, train=False, download=True, transform=transform_test) total_set = ConcatDataset((trainset, testset)) elif dataset == 'CIFAR-100': transform_train = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR100(root=data_path, train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR100(root=data_path, train=False, download=True, transform=transform_test) total_set = ConcatDataset((trainset, testset)) elif dataset == 'MNIST': transform_train = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), # transforms.Normalize(mean=[0.5], std=[0.5]), ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), # transforms.Normalize(mean=[0.5], std=[0.5]), ]) trainset = torchvision.datasets.MNIST(root=data_path, train=True, download=True, transform=transform_train) testset = torchvision.datasets.MNIST(root=data_path, train=False, download=True, transform=transform_test) total_set = ConcatDataset((trainset, testset)) elif dataset == 'Fashion-MNIST': transform_train = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), # transforms.Normalize(mean=[0.5], std=[0.5]), ]) transform_test = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), # transforms.Normalize(mean=[0.5], std=[0.5]), ]) trainset = torchvision.datasets.FashionMNIST(root=data_path, train=True, download=True, transform=transform_train) testset = torchvision.datasets.FashionMNIST(root=data_path, train=False, download=True, transform=transform_test) total_set = ConcatDataset((trainset, testset)) elif dataset == 'SVHN': transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ]) trainset = torchvision.datasets.SVHN(root=data_path, split='train', download=True, transform=transform_train) testset = torchvision.datasets.SVHN(root=data_path, split='test', download=True, transform=transform_test) total_set = ConcatDataset((trainset, testset)) elif dataset == 'adult': # todo : import pre processing code dataset = np.load(os.path.join(data_path, 'preprocessed.npy'), allow_pickle=True).item() total_set = CustomDataset(torch.FloatTensor(dataset['data']), dataset['label']) elif dataset == 'location': dataset = np.load(os.path.join(data_path, 'data_complete.npz')) # print(np.unique(dataset['y']-1)) total_set = CustomDataset(torch.FloatTensor(dataset['x']), dataset['y'] - 1) return total_set